From cac40a6a8563da3f91297be6757b760dd41ac2ea Mon Sep 17 00:00:00 2001 From: Ferdinand Majerech Date: Fri, 25 Jul 2014 03:36:55 +0200 Subject: [PATCH] scanTag scans into a slice,tag handle/suffix are split by valueDivider, not \0 --- source/dyaml/parser.d | 22 ++++++++++++++-------- source/dyaml/scanner.d | 32 +++++++++++++++----------------- source/dyaml/token.d | 15 +++++++++------ 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/source/dyaml/parser.d b/source/dyaml/parser.d index 8f59dde..28a67d5 100644 --- a/source/dyaml/parser.d +++ b/source/dyaml/parser.d @@ -425,6 +425,8 @@ final class Parser string tag = null; Mark startMark, endMark, tagMark; bool invalidMarks = true; + // The index in the tag string where tag handle ends and tag suffix starts. + uint tagHandleEnd; //Get anchor/tag if detected. Return false otherwise. bool get(const TokenID id, const Flag!"first" first, ref string target) @@ -433,7 +435,11 @@ final class Parser invalidMarks = false; immutable token = scanner_.getToken(); if(first){startMark = token.startMark;} - if(id == TokenID.Tag){tagMark = token.startMark;} + if(id == TokenID.Tag) + { + tagMark = token.startMark; + tagHandleEnd = token.valueDivider; + } endMark = token.endMark; target = token.value; return true; @@ -443,7 +449,7 @@ final class Parser if(get(TokenID.Anchor, Yes.first, anchor)){get(TokenID.Tag, No.first, tag);} else if(get(TokenID.Tag, Yes.first, tag)) {get(TokenID.Anchor, No.first, anchor);} - if(tag !is null){tag = processTag(tag, startMark, tagMark);} + if(tag !is null){tag = processTag(tag, tagHandleEnd, startMark, tagMark);} if(invalidMarks) { @@ -525,17 +531,17 @@ final class Parser * Process a tag string retrieved from a tag token. * * Params: tag = Tag before processing. + * handleEnd = Index in tag where tag handle ends and tag suffix + * starts. * startMark = Position of the node the tag belongs to. * tagMark = Position of the tag. */ - string processTag(const string tag, const Mark startMark, const Mark tagMark) + string processTag(const string tag, const uint handleEnd, + const Mark startMark, const Mark tagMark) const @trusted { - //Tag handle and suffix are separated by '\0'. - const parts = tag.split("\0"); - assert(parts.length == 2, "Tag data stored incorrectly in a token"); - const handle = parts[0]; - const suffix = parts[1]; + const handle = tag[0 .. handleEnd]; + const suffix = tag[handleEnd .. $]; if(handle.length > 0) { diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d index f1547be..3fa3cb2 100644 --- a/source/dyaml/scanner.d +++ b/source/dyaml/scanner.d @@ -1064,19 +1064,19 @@ final class Scanner { const startMark = reader_.mark; dchar c = reader_.peek(1); - dstring handle; - dstring suffix; + + reader_.sliceBuilder.begin(); + scope(failure) { reader_.sliceBuilder.finish(); } + // Index where tag handle ends and tag suffix starts in the tag value + // (slice) we will produce. + uint handleEnd; if(c == '<') { reader_.forward(2); - reader_.sliceBuilder.begin(); - { - scope(failure) { reader_.sliceBuilder.finish(); } - scanTagURIToSlice("tag", startMark); - } - suffix = reader_.sliceBuilder.finish(); + handleEnd = 0; + scanTagURIToSlice("tag", startMark); enforce(reader_.peek() == '>', new Error("While scanning a tag", startMark, "expected '>' but found" ~ reader_.peek().to!string, @@ -1085,8 +1085,9 @@ final class Scanner } else if(" \t\0\n\r\u0085\u2028\u2029"d.canFind(c)) { - suffix = "!"d.dup; reader_.forward(); + handleEnd = 0; + reader_.sliceBuilder.write('!'); } else { @@ -1106,28 +1107,25 @@ final class Scanner if(useHandle) { - reader_.sliceBuilder.begin(); - scope(failure) { reader_.sliceBuilder.finish(); } scanTagHandleToSlice("tag", startMark); - handle = reader_.sliceBuilder.finish(); + handleEnd = cast(uint)reader_.sliceBuilder.length; } else { - handle = "!"d.dup; reader_.forward(); + reader_.sliceBuilder.write('!'); + handleEnd = cast(uint)reader_.sliceBuilder.length; } - reader_.sliceBuilder.begin(); - scope(failure) { reader_.sliceBuilder.finish(); } scanTagURIToSlice("tag", startMark); - suffix = reader_.sliceBuilder.finish(); } enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()), new Error("While scanning a tag", startMark, "expected ' ' but found" ~ reader_.peek().to!string, reader_.mark)); - return tagToken(startMark, reader_.mark, utf32To8(handle ~ '\0' ~ suffix)); + const dstring slice = reader_.sliceBuilder.finish(); + return tagToken(startMark, reader_.mark, slice.utf32To8, handleEnd); } ///Scan a block scalar token with specified style. diff --git a/source/dyaml/token.d b/source/dyaml/token.d index 2625df2..60e792f 100644 --- a/source/dyaml/token.d +++ b/source/dyaml/token.d @@ -124,13 +124,16 @@ alias simpleToken!(TokenID.FlowEntry) flowEntryToken; /// Construct a simple token with value with specified type. /// -/// Params: id = Type of the token. -/// start = Start position of the token. -/// end = End position of the token. -/// value = Value of the token. -Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value) +/// Params: id = Type of the token. +/// start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// valueDivider = A hack for TagToken to store 2 values in value; the first +/// value goes up to valueDivider, the second after it. +Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value, + const uint valueDivider = uint.max) { - return Token(value, start, end, id); + return Token(value, start, end, id, ScalarStyle.Invalid, Encoding.init, valueDivider); } /// Alias for construction of tag token.