scanTagURI now scans to a slice, as does scanURIEscapes, at cost of complexity

2014-07-25 02:34:53 +02:00 · 2014-07-25 02:34:53 +02:00 · 817dc3b610
commit 817dc3b610
parent 457cabbb72
1 changed files with 83 additions and 54 deletions
--- a/source/dyaml/scanner.d
+++ b/source/dyaml/scanner.d
@ -991,13 +991,13 @@ final class Scanner
            return value;
        }
-        ///Scan prefix of a tag directive.
+        /// Scan prefix of a tag directive.
-        dchar[] scanTagDirectivePrefix(const Mark startMark) @safe pure
+        dstring scanTagDirectivePrefix(const Mark startMark) @safe pure
        {
            auto value = scanTagURI("directive", startMark);
            enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
                    new Error("While scanning a directive prefix", startMark,
-                              "expected ' ', but found" ~ to!string(reader_.peek()),
+                              "expected ' ', but found" ~ reader_.peek().to!string,
                              reader_.mark));
            return value;
@ -1054,13 +1054,13 @@ final class Scanner
            assert(false, "This code should never be reached");
        }
-        ///Scan a tag token.
+        /// Scan a tag token.
-        Token scanTag() @safe pure
+        Token scanTag() @trusted pure
        {
            const startMark = reader_.mark;
            dchar c = reader_.peek(1);
-            dchar[] handle;
+            dstring handle;
-            dchar[] suffix;
+            dstring suffix;
            if(c == '<')
            {
@ -1699,48 +1699,80 @@ final class Scanner
        }
        /// Scan URI in a tag token.
-        dchar[] scanTagURI(const string name, const Mark startMark) @trusted pure
+        dstring scanTagURI(const string name, const Mark startMark) @trusted pure
        {
            // Note: we do not check if URI is well-formed.
            // Using appender_, so clear it when we're done.
            scope(exit) { appender_.clear(); }
            uint length = 0;
            reader_.sliceBuilder.begin();
            dchar c = reader_.peek();
            {
                scope(failure) { reader_.sliceBuilder.finish(); }
                uint length = 0;
                while(isAlphaNum(c) || "-;/?:@&=+$,_.!~*\'()[]%"d.canFind(c))
                {
                    if(c == '%')
                    {
-                    appender_.put(reader_.get(length));
+                        auto chars = reader_.get(length);
                        reader_.sliceBuilder.write(chars);
                        length = 0;
-                    appender_.put(scanURIEscapes(name, startMark));
+                        scanURIEscapesToSlice(name, startMark);
                    }
                    else { ++length; }
                    c = reader_.peek(length);
                }
                if(length > 0)
                {
-                appender_.put(reader_.get(length));
+                    auto chars = reader_.get(length);
                    reader_.sliceBuilder.write(chars);
                    length = 0;
                }
-            enforce(appender_.data.length > 0,
+            }
            dstring result = reader_.sliceBuilder.finish();
            enforce(!result.empty,
                    new Error("While parsing a " ~ name, startMark,
                              "expected URI, but found: " ~ c.to!string, reader_.mark));
-            return appender_.data;
+            return result;
        }
        /// Scan URI escape sequences.
-        dchar[] scanURIEscapes(const string name, const Mark startMark) @system pure
+        void scanURIEscapesToSlice(const string name, const Mark startMark) @system pure
        {
-            ubyte[] bytes;
+            // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
            // decoding into UTF-32.
            char[4] bytes;
            size_t bytesUsed;
            Mark mark = reader_.mark;
            // Get one dchar by decoding data from bytes.
            //
            // This is probably slow, but simple and URI escapes are extremely uncommon
            // in YAML.
            static size_t getDchar(char[] bytes, Reader reader_)
            {
                import std.utf;
                size_t nextChar;
                const c = std.utf.decode(bytes[], nextChar); 
                reader_.sliceBuilder.write(c);
                if(bytes.length - nextChar > 0)
                {
                    core.stdc.string.memmove(bytes.ptr, bytes.ptr + nextChar, 
                                             bytes.length - nextChar);
                }
                return bytes.length - nextChar;
            }
            try 
            {
                while(reader_.peek() == '%')
                {
                    reader_.forward();
                    if(bytesUsed == bytes.length)
                    {
                        bytesUsed = getDchar(bytes[], reader_); 
                    }
-                ubyte b = 0;
+                    char b = 0;
                    uint mult = 16;
                    // Converting 2 hexadecimal digits to a byte.
                    foreach(k; 0 .. 2)
@ -1760,15 +1792,12 @@ final class Scanner
                        b += mult * digit;
                        mult /= 16;
                    }
-                bytes ~= b;
+                    bytes[bytesUsed++] = b;
                    reader_.forward(2);
                }
-            try { return to!(dchar[])(cast(string)bytes); }
+                bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
            catch(ConvException e)
            {
                throw new Error("While scanning a " ~ name, startMark, e.msg, mark);
            }
            catch(UTFException e)
            {