scanTag scans into a slice,tag handle/suffix are split by valueDivider, not \0

This commit is contained in:
Ferdinand Majerech 2014-07-25 03:36:55 +02:00
parent 918395ea64
commit cac40a6a85
3 changed files with 38 additions and 31 deletions

View file

@ -425,6 +425,8 @@ final class Parser
string tag = null; string tag = null;
Mark startMark, endMark, tagMark; Mark startMark, endMark, tagMark;
bool invalidMarks = true; bool invalidMarks = true;
// The index in the tag string where tag handle ends and tag suffix starts.
uint tagHandleEnd;
//Get anchor/tag if detected. Return false otherwise. //Get anchor/tag if detected. Return false otherwise.
bool get(const TokenID id, const Flag!"first" first, ref string target) bool get(const TokenID id, const Flag!"first" first, ref string target)
@ -433,7 +435,11 @@ final class Parser
invalidMarks = false; invalidMarks = false;
immutable token = scanner_.getToken(); immutable token = scanner_.getToken();
if(first){startMark = token.startMark;} if(first){startMark = token.startMark;}
if(id == TokenID.Tag){tagMark = token.startMark;} if(id == TokenID.Tag)
{
tagMark = token.startMark;
tagHandleEnd = token.valueDivider;
}
endMark = token.endMark; endMark = token.endMark;
target = token.value; target = token.value;
return true; return true;
@ -443,7 +449,7 @@ final class Parser
if(get(TokenID.Anchor, Yes.first, anchor)){get(TokenID.Tag, No.first, tag);} if(get(TokenID.Anchor, Yes.first, anchor)){get(TokenID.Tag, No.first, tag);}
else if(get(TokenID.Tag, Yes.first, tag)) {get(TokenID.Anchor, No.first, anchor);} else if(get(TokenID.Tag, Yes.first, tag)) {get(TokenID.Anchor, No.first, anchor);}
if(tag !is null){tag = processTag(tag, startMark, tagMark);} if(tag !is null){tag = processTag(tag, tagHandleEnd, startMark, tagMark);}
if(invalidMarks) if(invalidMarks)
{ {
@ -525,17 +531,17 @@ final class Parser
* Process a tag string retrieved from a tag token. * Process a tag string retrieved from a tag token.
* *
* Params: tag = Tag before processing. * Params: tag = Tag before processing.
* handleEnd = Index in tag where tag handle ends and tag suffix
* starts.
* startMark = Position of the node the tag belongs to. * startMark = Position of the node the tag belongs to.
* tagMark = Position of the tag. * tagMark = Position of the tag.
*/ */
string processTag(const string tag, const Mark startMark, const Mark tagMark) string processTag(const string tag, const uint handleEnd,
const Mark startMark, const Mark tagMark)
const @trusted const @trusted
{ {
//Tag handle and suffix are separated by '\0'. const handle = tag[0 .. handleEnd];
const parts = tag.split("\0"); const suffix = tag[handleEnd .. $];
assert(parts.length == 2, "Tag data stored incorrectly in a token");
const handle = parts[0];
const suffix = parts[1];
if(handle.length > 0) if(handle.length > 0)
{ {

View file

@ -1064,19 +1064,19 @@ final class Scanner
{ {
const startMark = reader_.mark; const startMark = reader_.mark;
dchar c = reader_.peek(1); dchar c = reader_.peek(1);
dstring handle;
dstring suffix; reader_.sliceBuilder.begin();
scope(failure) { reader_.sliceBuilder.finish(); }
// Index where tag handle ends and tag suffix starts in the tag value
// (slice) we will produce.
uint handleEnd;
if(c == '<') if(c == '<')
{ {
reader_.forward(2); reader_.forward(2);
reader_.sliceBuilder.begin(); handleEnd = 0;
{ scanTagURIToSlice("tag", startMark);
scope(failure) { reader_.sliceBuilder.finish(); }
scanTagURIToSlice("tag", startMark);
}
suffix = reader_.sliceBuilder.finish();
enforce(reader_.peek() == '>', enforce(reader_.peek() == '>',
new Error("While scanning a tag", startMark, new Error("While scanning a tag", startMark,
"expected '>' but found" ~ reader_.peek().to!string, "expected '>' but found" ~ reader_.peek().to!string,
@ -1085,8 +1085,9 @@ final class Scanner
} }
else if(" \t\0\n\r\u0085\u2028\u2029"d.canFind(c)) else if(" \t\0\n\r\u0085\u2028\u2029"d.canFind(c))
{ {
suffix = "!"d.dup;
reader_.forward(); reader_.forward();
handleEnd = 0;
reader_.sliceBuilder.write('!');
} }
else else
{ {
@ -1106,28 +1107,25 @@ final class Scanner
if(useHandle) if(useHandle)
{ {
reader_.sliceBuilder.begin();
scope(failure) { reader_.sliceBuilder.finish(); }
scanTagHandleToSlice("tag", startMark); scanTagHandleToSlice("tag", startMark);
handle = reader_.sliceBuilder.finish(); handleEnd = cast(uint)reader_.sliceBuilder.length;
} }
else else
{ {
handle = "!"d.dup;
reader_.forward(); reader_.forward();
reader_.sliceBuilder.write('!');
handleEnd = cast(uint)reader_.sliceBuilder.length;
} }
reader_.sliceBuilder.begin();
scope(failure) { reader_.sliceBuilder.finish(); }
scanTagURIToSlice("tag", startMark); scanTagURIToSlice("tag", startMark);
suffix = reader_.sliceBuilder.finish();
} }
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()), enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
new Error("While scanning a tag", startMark, new Error("While scanning a tag", startMark,
"expected ' ' but found" ~ reader_.peek().to!string, "expected ' ' but found" ~ reader_.peek().to!string,
reader_.mark)); reader_.mark));
return tagToken(startMark, reader_.mark, utf32To8(handle ~ '\0' ~ suffix)); const dstring slice = reader_.sliceBuilder.finish();
return tagToken(startMark, reader_.mark, slice.utf32To8, handleEnd);
} }
///Scan a block scalar token with specified style. ///Scan a block scalar token with specified style.

View file

@ -124,13 +124,16 @@ alias simpleToken!(TokenID.FlowEntry) flowEntryToken;
/// Construct a simple token with value with specified type. /// Construct a simple token with value with specified type.
/// ///
/// Params: id = Type of the token. /// Params: id = Type of the token.
/// start = Start position of the token. /// start = Start position of the token.
/// end = End position of the token. /// end = End position of the token.
/// value = Value of the token. /// value = Value of the token.
Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value) /// valueDivider = A hack for TagToken to store 2 values in value; the first
/// value goes up to valueDivider, the second after it.
Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value,
const uint valueDivider = uint.max)
{ {
return Token(value, start, end, id); return Token(value, start, end, id, ScalarStyle.Invalid, Encoding.init, valueDivider);
} }
/// Alias for construction of tag token. /// Alias for construction of tag token.