Block scalar scanning now works with UTF-8.
This commit is contained in:
parent
19ed03cb3e
commit
6837156258
|
@ -853,14 +853,14 @@ final class Scanner
|
||||||
///
|
///
|
||||||
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
||||||
/// characters into that slice.
|
/// characters into that slice.
|
||||||
void scanToNextBreakToSlice() @system pure nothrow @nogc
|
void scanToNextBreakToSlice8() @system pure nothrow @nogc
|
||||||
{
|
{
|
||||||
uint length = 0;
|
uint length = 0;
|
||||||
while(!"\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(length)))
|
while(!"\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(length)))
|
||||||
{
|
{
|
||||||
++length;
|
++length;
|
||||||
}
|
}
|
||||||
reader_.sliceBuilder.write(reader_.get(length));
|
reader_.sliceBuilder8.write(reader_.get8(length));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1230,23 +1230,23 @@ final class Scanner
|
||||||
Mark endMark;
|
Mark endMark;
|
||||||
uint indent = max(1, indent_ + 1);
|
uint indent = max(1, indent_ + 1);
|
||||||
|
|
||||||
reader_.sliceBuilder.begin();
|
reader_.sliceBuilder8.begin();
|
||||||
alias Transaction = SliceBuilder.Transaction;
|
alias Transaction = SliceBuilder8.Transaction;
|
||||||
// Used to strip the last line breaks written to the slice at the end of the
|
// Used to strip the last line breaks written to the slice at the end of the
|
||||||
// scalar, which may be needed based on chomping.
|
// scalar, which may be needed based on chomping.
|
||||||
Transaction breaksTransaction = Transaction(reader_.sliceBuilder);
|
Transaction breaksTransaction = Transaction(reader_.sliceBuilder8);
|
||||||
// Read the first indentation/line breaks before the scalar.
|
// Read the first indentation/line breaks before the scalar.
|
||||||
size_t startLen = reader_.sliceBuilder.length;
|
size_t startLen = reader_.sliceBuilder8.length;
|
||||||
if(increment == int.min)
|
if(increment == int.min)
|
||||||
{
|
{
|
||||||
auto indentation = scanBlockScalarIndentationToSlice();
|
auto indentation = scanBlockScalarIndentationToSlice8();
|
||||||
endMark = indentation[1];
|
endMark = indentation[1];
|
||||||
indent = max(indent, indentation[0]);
|
indent = max(indent, indentation[0]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
indent += increment - 1;
|
indent += increment - 1;
|
||||||
endMark = scanBlockScalarBreaksToSlice(indent);
|
endMark = scanBlockScalarBreaksToSlice8(indent);
|
||||||
}
|
}
|
||||||
|
|
||||||
// int.max means there's no line break (int.max is outside UTF-32).
|
// int.max means there's no line break (int.max is outside UTF-32).
|
||||||
|
@ -1258,16 +1258,17 @@ final class Scanner
|
||||||
breaksTransaction.commit();
|
breaksTransaction.commit();
|
||||||
const bool leadingNonSpace = !" \t"d.canFind(reader_.peek());
|
const bool leadingNonSpace = !" \t"d.canFind(reader_.peek());
|
||||||
// This is where the 'interesting' non-whitespace data gets read.
|
// This is where the 'interesting' non-whitespace data gets read.
|
||||||
scanToNextBreakToSlice();
|
scanToNextBreakToSlice8();
|
||||||
lineBreak = scanLineBreak();
|
lineBreak = scanLineBreak8();
|
||||||
|
|
||||||
|
|
||||||
// This transaction serves to rollback data read in the
|
// This transaction serves to rollback data read in the
|
||||||
// scanBlockScalarBreaksToSlice() call.
|
// scanBlockScalarBreaksToSlice() call.
|
||||||
breaksTransaction = Transaction(reader_.sliceBuilder);
|
breaksTransaction = Transaction(reader_.sliceBuilder8);
|
||||||
startLen = reader_.sliceBuilder.length;
|
startLen = reader_.sliceBuilder8.length;
|
||||||
// The line breaks should actually be written _after_ the if() block
|
// The line breaks should actually be written _after_ the if() block
|
||||||
// below. We work around that by inserting
|
// below. We work around that by inserting
|
||||||
endMark = scanBlockScalarBreaksToSlice(indent);
|
endMark = scanBlockScalarBreaksToSlice8(indent);
|
||||||
|
|
||||||
// This will not run during the last iteration (see the if() vs the
|
// This will not run during the last iteration (see the if() vs the
|
||||||
// while()), hence breaksTransaction rollback (which happens after this
|
// while()), hence breaksTransaction rollback (which happens after this
|
||||||
|
@ -1282,16 +1283,16 @@ final class Scanner
|
||||||
{
|
{
|
||||||
// No breaks were scanned; no need to insert the space in the
|
// No breaks were scanned; no need to insert the space in the
|
||||||
// middle of slice.
|
// middle of slice.
|
||||||
if(startLen == reader_.sliceBuilder.length)
|
if(startLen == reader_.sliceBuilder8.length)
|
||||||
{
|
{
|
||||||
reader_.sliceBuilder.write(' ');
|
reader_.sliceBuilder8.write(' ');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// We need to insert in the middle of the slice in case any line
|
// We need to insert in the middle of the slice in case any line
|
||||||
// breaks were scanned.
|
// breaks were scanned.
|
||||||
reader_.sliceBuilder.insert(lineBreak, startLen);
|
reader_.sliceBuilder8.insert(lineBreak, startLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
////this is Clark Evans's interpretation (also in the spec
|
////this is Clark Evans's interpretation (also in the spec
|
||||||
|
@ -1303,7 +1304,7 @@ final class Scanner
|
||||||
// {
|
// {
|
||||||
// if(!" \t"d.canFind(reader_.peek()))
|
// if(!" \t"d.canFind(reader_.peek()))
|
||||||
// {
|
// {
|
||||||
// reader_.sliceBuilder.write(' ');
|
// reader_.sliceBuilder8.write(' ');
|
||||||
// }
|
// }
|
||||||
// else
|
// else
|
||||||
// {
|
// {
|
||||||
|
@ -1313,7 +1314,7 @@ final class Scanner
|
||||||
//}
|
//}
|
||||||
//else
|
//else
|
||||||
//{
|
//{
|
||||||
// reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
|
// reader_.sliceBuilder8.insertBack(lineBreak, endLen - startLen);
|
||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1334,19 +1335,19 @@ final class Scanner
|
||||||
// be inserted _before_ the other line breaks.
|
// be inserted _before_ the other line breaks.
|
||||||
if(chomping == Chomping.Keep)
|
if(chomping == Chomping.Keep)
|
||||||
{
|
{
|
||||||
reader_.sliceBuilder.insert(lineBreak, startLen);
|
reader_.sliceBuilder8.insert(lineBreak, startLen);
|
||||||
}
|
}
|
||||||
// If chomping is not Keep, breaksTransaction was cancelled so we can
|
// If chomping is not Keep, breaksTransaction was cancelled so we can
|
||||||
// directly write the first line break (as it isn't stripped - chomping
|
// directly write the first line break (as it isn't stripped - chomping
|
||||||
// is not Strip)
|
// is not Strip)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
reader_.sliceBuilder.write(lineBreak);
|
reader_.sliceBuilder8.write(lineBreak);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const slice = reader_.sliceBuilder.finish();
|
const slice = reader_.sliceBuilder8.finish();
|
||||||
return scalarToken(startMark, endMark, slice.utf32To8, style);
|
return scalarToken(startMark, endMark, slice, style);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan chomping and indentation indicators of a scalar token.
|
/// Scan chomping and indentation indicators of a scalar token.
|
||||||
|
@ -1439,7 +1440,7 @@ final class Scanner
|
||||||
|
|
||||||
if("\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
|
if("\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
|
||||||
{
|
{
|
||||||
scanLineBreak();
|
scanLineBreak8();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
error("While scanning a block scalar", startMark,
|
error("While scanning a block scalar", startMark,
|
||||||
|
@ -1450,7 +1451,7 @@ final class Scanner
|
||||||
///
|
///
|
||||||
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
||||||
/// characters into that slice.
|
/// characters into that slice.
|
||||||
Tuple!(uint, Mark) scanBlockScalarIndentationToSlice()
|
Tuple!(uint, Mark) scanBlockScalarIndentationToSlice8()
|
||||||
@system pure nothrow @nogc
|
@system pure nothrow @nogc
|
||||||
{
|
{
|
||||||
uint maxIndent;
|
uint maxIndent;
|
||||||
|
@ -1460,7 +1461,7 @@ final class Scanner
|
||||||
{
|
{
|
||||||
if(reader_.peek() != ' ')
|
if(reader_.peek() != ' ')
|
||||||
{
|
{
|
||||||
reader_.sliceBuilder.write(scanLineBreak());
|
reader_.sliceBuilder8.write(scanLineBreak8());
|
||||||
endMark = reader_.mark;
|
endMark = reader_.mark;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1475,7 +1476,7 @@ final class Scanner
|
||||||
///
|
///
|
||||||
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
||||||
/// characters into that slice.
|
/// characters into that slice.
|
||||||
Mark scanBlockScalarBreaksToSlice(const uint indent) @trusted pure nothrow @nogc
|
Mark scanBlockScalarBreaksToSlice8(const uint indent) @trusted pure nothrow @nogc
|
||||||
{
|
{
|
||||||
Mark endMark = reader_.mark;
|
Mark endMark = reader_.mark;
|
||||||
|
|
||||||
|
@ -1483,7 +1484,7 @@ final class Scanner
|
||||||
{
|
{
|
||||||
while(reader_.column < indent && reader_.peek() == ' ') { reader_.forward(); }
|
while(reader_.column < indent && reader_.peek() == ' ') { reader_.forward(); }
|
||||||
if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
|
if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
|
||||||
reader_.sliceBuilder.write(scanLineBreak());
|
reader_.sliceBuilder8.write(scanLineBreak8());
|
||||||
endMark = reader_.mark;
|
endMark = reader_.mark;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue