scanTag scans into a slice,tag handle/suffix are split by valueDivider, not \0

2014-07-25 03:36:55 +02:00 · 2014-07-25 03:36:55 +02:00 · cac40a6a85
parent 918395ea64
commit cac40a6a85
3 changed files with 38 additions and 31 deletions
--- a/source/dyaml/parser.d
+++ b/source/dyaml/parser.d
@ -425,6 +425,8 @@ final class Parser
            string tag = null;
            Mark startMark, endMark, tagMark;
            bool invalidMarks = true;
+            // The index in the tag string where tag handle ends and tag suffix starts.
+            uint tagHandleEnd;

            //Get anchor/tag if detected. Return false otherwise.
            bool get(const TokenID id, const Flag!"first" first, ref string target)
@ -433,7 +435,11 @@ final class Parser
                invalidMarks = false;
                immutable token = scanner_.getToken();
                if(first){startMark = token.startMark;}
-                if(id == TokenID.Tag){tagMark = token.startMark;}
+                if(id == TokenID.Tag)
+                {
+                    tagMark = token.startMark;
+                    tagHandleEnd = token.valueDivider;
+                }
                endMark = token.endMark; 
                target  = token.value;
                return true;
@ -443,7 +449,7 @@ final class Parser
            if(get(TokenID.Anchor, Yes.first, anchor)){get(TokenID.Tag, No.first, tag);}
            else if(get(TokenID.Tag, Yes.first, tag)) {get(TokenID.Anchor, No.first, anchor);}

-            if(tag !is null){tag = processTag(tag, startMark, tagMark);}
+            if(tag !is null){tag = processTag(tag, tagHandleEnd, startMark, tagMark);}

            if(invalidMarks)
            {
@ -525,17 +531,17 @@ final class Parser
         * Process a tag string retrieved from a tag token.
         *
         * Params:  tag       = Tag before processing.
+         *          handleEnd = Index in tag where tag handle ends and tag suffix
+         *                      starts.
         *          startMark = Position of the node the tag belongs to.
         *          tagMark   = Position of the tag.
         */ 
-        string processTag(const string tag, const Mark startMark, const Mark tagMark)
+        string processTag(const string tag, const uint handleEnd,
+                          const Mark startMark, const Mark tagMark)
            const @trusted
        {
-            //Tag handle and suffix are separated by '\0'.
-            const parts = tag.split("\0");
-            assert(parts.length == 2, "Tag data stored incorrectly in a token");
-            const handle = parts[0];
-            const suffix = parts[1];
+            const handle = tag[0 .. handleEnd];
+            const suffix = tag[handleEnd .. $];

            if(handle.length > 0)
            {
--- a/source/dyaml/scanner.d
+++ b/source/dyaml/scanner.d
@ -1064,19 +1064,19 @@ final class Scanner
        {
            const startMark = reader_.mark;
            dchar c = reader_.peek(1);
-            dstring handle;
-            dstring suffix;
+
+            reader_.sliceBuilder.begin();
+            scope(failure) { reader_.sliceBuilder.finish(); }
+            // Index where tag handle ends and tag suffix starts in the tag value
+            // (slice) we will produce.
+            uint handleEnd;

            if(c == '<')
            {
                reader_.forward(2);

-                reader_.sliceBuilder.begin();
-                {
-                    scope(failure) { reader_.sliceBuilder.finish(); }
-                    scanTagURIToSlice("tag", startMark);
-                }
-                suffix = reader_.sliceBuilder.finish();
+                handleEnd = 0;
+                scanTagURIToSlice("tag", startMark);
                enforce(reader_.peek() == '>',
                        new Error("While scanning a tag", startMark,
                                  "expected '>' but found" ~ reader_.peek().to!string,
@ -1085,8 +1085,9 @@ final class Scanner
            }
            else if(" \t\0\n\r\u0085\u2028\u2029"d.canFind(c))
            {
-                suffix = "!"d.dup;
                reader_.forward();
+                handleEnd = 0;
+                reader_.sliceBuilder.write('!');
            }
            else
            {
@ -1106,28 +1107,25 @@ final class Scanner

                if(useHandle)
                {
-                    reader_.sliceBuilder.begin();
-                    scope(failure) { reader_.sliceBuilder.finish(); }
                    scanTagHandleToSlice("tag", startMark);
-                    handle = reader_.sliceBuilder.finish();
+                    handleEnd = cast(uint)reader_.sliceBuilder.length;
                }
                else
                {
-                    handle = "!"d.dup;
                    reader_.forward();
+                    reader_.sliceBuilder.write('!');
+                    handleEnd = cast(uint)reader_.sliceBuilder.length;
                }

-                reader_.sliceBuilder.begin();
-                scope(failure) { reader_.sliceBuilder.finish(); }
                scanTagURIToSlice("tag", startMark);
-                suffix = reader_.sliceBuilder.finish();
            }

            enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
                    new Error("While scanning a tag", startMark,
                              "expected ' ' but found" ~ reader_.peek().to!string,
                              reader_.mark));
-            return tagToken(startMark, reader_.mark, utf32To8(handle ~ '\0' ~ suffix));
+            const dstring slice = reader_.sliceBuilder.finish();
+            return tagToken(startMark, reader_.mark, slice.utf32To8, handleEnd);
        }

        ///Scan a block scalar token with specified style.
--- a/source/dyaml/token.d
+++ b/source/dyaml/token.d
@ -124,13 +124,16 @@ alias simpleToken!(TokenID.FlowEntry)          flowEntryToken;

 /// Construct a simple token with value with specified type.
 ///
-/// Params:  id    = Type of the token.
-///          start = Start position of the token.
-///          end   = End position of the token.
-///          value = Value of the token.
-Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value)
+/// Params:  id           = Type of the token.
+///          start        = Start position of the token.
+///          end          = End position of the token.
+///          value        = Value of the token.
+///          valueDivider = A hack for TagToken to store 2 values in value; the first
+///                         value goes up to valueDivider, the second after it.
+Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value,
+                                   const uint valueDivider = uint.max)
 {
-    return Token(value, start, end, id);
+    return Token(value, start, end, id, ScalarStyle.Invalid, Encoding.init, valueDivider);
 }

 /// Alias for construction of tag token.