Flow scalar tokens are now scanned into slices.

This commit is contained in:
Ferdinand Majerech 2014-07-24 18:46:21 +02:00
parent 706ce872b0
commit 039d601a5f

View file

@ -1273,77 +1273,82 @@ final class Scanner
} }
/// Scan a qouted flow scalar token with specified quotes. /// Scan a qouted flow scalar token with specified quotes.
Token scanFlowScalar(const ScalarStyle quotes) @safe pure Token scanFlowScalar(const ScalarStyle quotes) @trusted pure
{ {
const startMark = reader_.mark; const startMark = reader_.mark;
const quote = reader_.get(); const quote = reader_.get();
// Using appender_, so clear it when we're done. reader_.sliceBuilder.begin();
scope(exit) { appender_.clear(); } //XXX remove once nothrow
scope(failure) { reader_.sliceBuilder.finish(); }
scope(exit) { if(error_) {reader_.sliceBuilder.finish();}}
// Puts scanned data to appender_. scanFlowScalarNonSpacesToSlice(quotes, startMark);
scanFlowScalarNonSpaces(quotes, startMark);
while(reader_.peek() != quote) while(reader_.peek() != quote)
{ {
// Puts scanned data to appender_. scanFlowScalarSpacesToSlice(startMark);
scanFlowScalarSpaces(startMark); scanFlowScalarNonSpacesToSlice(quotes, startMark);
scanFlowScalarNonSpaces(quotes, startMark);
} }
reader_.forward(); reader_.forward();
return scalarToken(startMark, reader_.mark, utf32To8(appender_.data), quotes); auto slice = reader_.sliceBuilder.finish();
return scalarToken(startMark, reader_.mark, slice.utf32To8, quotes);
} }
///Scan nonspace characters in a flow scalar. /// Scan nonspace characters in a flow scalar.
void scanFlowScalarNonSpaces(const ScalarStyle quotes, const Mark startMark) ///
@safe pure /// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
@system pure
{ {
for(;;) for(;;) with(ScalarStyle)
{ {
dchar c = reader_.peek(); dchar c = reader_.peek();
uint length = 0; uint length = 0;
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search; mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
//This is an optimized way of writing: // This is an optimized way of writing:
//while(!search.canFind(reader_.peek(length))){++length;} // while(!search.canFind(reader_.peek(length))){++length;}
outer: for(;;) outer: for(;;)
{ {
const slice = reader_.slice(length, length + 32); const slice = reader_.slice(length, length + 32);
// XXX will be thrown by parent
enforce(slice.length > 0, enforce(slice.length > 0,
new Error("While reading a flow scalar", startMark, new Error("While reading a flow scalar", startMark,
"reached end of file", reader_.mark)); "reached end of file", reader_.mark));
foreach(ch; slice) foreach(ch; slice)
{ {
if(search.canFind(ch)){break outer;} if(search.canFind(ch)) { break outer; }
++length; ++length;
} }
} }
appender_.put(reader_.prefix(length)); reader_.sliceBuilder.write(reader_.get(length));
reader_.forward(length);
c = reader_.peek(); c = reader_.peek();
if(quotes == ScalarStyle.SingleQuoted && if(quotes == SingleQuoted && c == '\'' && reader_.peek(1) == '\'')
c == '\'' && reader_.peek(1) == '\'')
{ {
appender_.put('\'');
reader_.forward(2); reader_.forward(2);
reader_.sliceBuilder.write('\'');
} }
else if((quotes == ScalarStyle.DoubleQuoted && c == '\'') || else if((quotes == DoubleQuoted && c == '\'') ||
(quotes == ScalarStyle.SingleQuoted && "\"\\"d.canFind(c))) (quotes == SingleQuoted && "\"\\"d.canFind(c)))
{ {
appender_.put(c);
reader_.forward(); reader_.forward();
reader_.sliceBuilder.write(c);
} }
else if(quotes == ScalarStyle.DoubleQuoted && c == '\\') else if(quotes == DoubleQuoted && c == '\\')
{ {
reader_.forward(); reader_.forward();
c = reader_.peek(); c = reader_.peek();
if((c in dyaml.escapes.fromEscapes) !is null) if((c in dyaml.escapes.fromEscapes) !is null)
{ {
appender_.put(dyaml.escapes.fromEscapes[c]);
reader_.forward(); reader_.forward();
// This works because fromEscapes is dchar[dchar] - we use at
// most the space freed by the forward() call above.
reader_.sliceBuilder.write(dyaml.escapes.fromEscapes[c]);
} }
else if((c in dyaml.escapes.escapeHexCodes) !is null) else if((c in dyaml.escapes.escapeHexCodes) !is null)
{ {
@ -1355,66 +1360,79 @@ final class Scanner
enforce(isHexDigit(reader_.peek(i)), enforce(isHexDigit(reader_.peek(i)),
new Error( new Error(
"While scanning a double qouted scalar", startMark, "While scanning a double qouted scalar", startMark,
"expected escape sequence of " ~ to!string(length) ~ "expected escape sequence of " ~ length.to!string ~
" hexadecimal numbers, but found " ~ " hexadecimal numbers, but found " ~
to!string(reader_.peek(i)), reader_.mark)); reader_.peek(i).to!string, reader_.mark));
} }
dchar[] hex = reader_.get(length); dchar[] hex = reader_.get(length);
appender_.put(cast(dchar)parse!int(hex, 16)); reader_.sliceBuilder.write(cast(dchar)parse!int(hex, 16));
} }
else if("\n\r\u0085\u2028\u2029"d.canFind(c)) else if("\n\r\u0085\u2028\u2029"d.canFind(c))
{ {
scanLineBreak(); scanLineBreak();
appender_.put(scanFlowScalarBreaks(startMark)); scanFlowScalarBreaksToSlice(startMark);
throwIfError();
} }
else else
{ {
throw new Error("While scanning a double quoted scalar", startMark, throw new Error("While scanning a double quoted scalar", startMark,
"found unknown escape character: " ~ to!string(c), "found unknown escape character: " ~ c.to!string,
reader_.mark); reader_.mark);
} }
} }
else else { return; }
{
return;
}
} }
} }
/// Scan space characters in a flow scalar. /// Scan space characters in a flow scalar.
void scanFlowScalarSpaces(const Mark startMark) @safe pure ///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// spaces into that slice.
void scanFlowScalarSpacesToSlice(const Mark startMark) @system pure
{ {
// Increase length as long as we see whitespace. // Increase length as long as we see whitespace.
uint length = 0; size_t length = 0;
while(" \t"d.canFind(reader_.peek(length))) { ++length; } while(" \t"d.canFind(reader_.peek(length))) { ++length; }
const whitespaces = reader_.prefix(length + 1); auto whitespaces = reader_.prefix(length + 1);
const c = whitespaces[$ - 1]; const c = whitespaces[$ - 1];
enforce(c != '\0', new Error("While scanning a quoted scalar", startMark, enforce(c != '\0', new Error("While scanning a quoted scalar", startMark,
"found unexpected end of buffer", reader_.mark)); "found unexpected end of buffer", reader_.mark));
if("\n\r\u0085\u2028\u2029"d.canFind(c)) // Spaces not followed by a line break.
if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
{ {
reader_.forward(length); reader_.forward(length);
const lineBreak = scanLineBreak(); reader_.sliceBuilder.write(whitespaces[0 .. $ - 1]);
const breaks = scanFlowScalarBreaks(startMark); return;
}
if(lineBreak != '\n') { appender_.put(lineBreak); } // There's a line break after the spaces.
else if(breaks.length == 0) { appender_.put(' '); } reader_.forward(length);
appender_.put(breaks); const lineBreak = scanLineBreak();
}
else if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
{
appender_.put(whitespaces[0 .. $ - 1]); // If we have extra line breaks after the first, scan them into the
reader_.forward(length); // slice.
} const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
throwIfError();
// No extra breaks, one normal line break. Replace it with a space.
if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
} }
/// Scan line breaks in a flow scalar. /// Scan line breaks in a flow scalar.
dchar[] scanFlowScalarBreaks(const Mark startMark) @safe pure ///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// line breaks into that slice.
///
/// In case of an error, error_ is set. Check this before using the result.
bool scanFlowScalarBreaksToSlice(const Mark startMark)
@system pure nothrow @nogc
{ {
auto appender = appender!(dchar[])(); // True if at least one line break was found.
bool anyBreaks;
for(;;) for(;;)
{ {
// Instead of checking indentation, we check for document separators. // Instead of checking indentation, we check for document separators.
@ -1422,18 +1440,22 @@ final class Scanner
if((prefix == "---"d || prefix == "..."d) && if((prefix == "---"d || prefix == "..."d) &&
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3))) " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3)))
{ {
throw new Error("While scanning a quoted scalar", startMark, setError("While scanning a quoted scalar", startMark,
"found unexpected document separator", reader_.mark); "found unexpected document separator", reader_.mark);
return false;
} }
// Skip any whitespaces.
while(" \t"d.canFind(reader_.peek())) { reader_.forward(); } while(" \t"d.canFind(reader_.peek())) { reader_.forward(); }
if("\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) // Encountered a non-whitespace non-linebreak character, so we're done.
{ if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
appender.put(scanLineBreak());
} const lineBreak = scanLineBreak();
else { return appender.data; } anyBreaks = true;
reader_.sliceBuilder.write(lineBreak);
} }
return anyBreaks;
} }
/// Scan plain scalar token (no block, no quotes). /// Scan plain scalar token (no block, no quotes).