scanTagURI now scans to a slice, as does scanURIEscapes, at cost of complexity

This commit is contained in:
Ferdinand Majerech 2014-07-25 02:34:53 +02:00
parent 457cabbb72
commit 817dc3b610

View file

@ -992,12 +992,12 @@ final class Scanner
}
/// Scan prefix of a tag directive.
dchar[] scanTagDirectivePrefix(const Mark startMark) @safe pure
dstring scanTagDirectivePrefix(const Mark startMark) @safe pure
{
auto value = scanTagURI("directive", startMark);
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
new Error("While scanning a directive prefix", startMark,
"expected ' ', but found" ~ to!string(reader_.peek()),
"expected ' ', but found" ~ reader_.peek().to!string,
reader_.mark));
return value;
@ -1055,12 +1055,12 @@ final class Scanner
}
/// Scan a tag token.
Token scanTag() @safe pure
Token scanTag() @trusted pure
{
const startMark = reader_.mark;
dchar c = reader_.peek(1);
dchar[] handle;
dchar[] suffix;
dstring handle;
dstring suffix;
if(c == '<')
{
@ -1699,48 +1699,80 @@ final class Scanner
}
/// Scan URI in a tag token.
dchar[] scanTagURI(const string name, const Mark startMark) @trusted pure
dstring scanTagURI(const string name, const Mark startMark) @trusted pure
{
// Note: we do not check if URI is well-formed.
// Using appender_, so clear it when we're done.
scope(exit) { appender_.clear(); }
uint length = 0;
reader_.sliceBuilder.begin();
dchar c = reader_.peek();
{
scope(failure) { reader_.sliceBuilder.finish(); }
uint length = 0;
while(isAlphaNum(c) || "-;/?:@&=+$,_.!~*\'()[]%"d.canFind(c))
{
if(c == '%')
{
appender_.put(reader_.get(length));
auto chars = reader_.get(length);
reader_.sliceBuilder.write(chars);
length = 0;
appender_.put(scanURIEscapes(name, startMark));
scanURIEscapesToSlice(name, startMark);
}
else { ++length; }
c = reader_.peek(length);
}
if(length > 0)
{
appender_.put(reader_.get(length));
auto chars = reader_.get(length);
reader_.sliceBuilder.write(chars);
length = 0;
}
enforce(appender_.data.length > 0,
}
dstring result = reader_.sliceBuilder.finish();
enforce(!result.empty,
new Error("While parsing a " ~ name, startMark,
"expected URI, but found: " ~ c.to!string, reader_.mark));
return appender_.data;
return result;
}
/// Scan URI escape sequences.
dchar[] scanURIEscapes(const string name, const Mark startMark) @system pure
void scanURIEscapesToSlice(const string name, const Mark startMark) @system pure
{
ubyte[] bytes;
// URI escapes encode a UTF-8 string. We store UTF-8 code units here for
// decoding into UTF-32.
char[4] bytes;
size_t bytesUsed;
Mark mark = reader_.mark;
// Get one dchar by decoding data from bytes.
//
// This is probably slow, but simple and URI escapes are extremely uncommon
// in YAML.
static size_t getDchar(char[] bytes, Reader reader_)
{
import std.utf;
size_t nextChar;
const c = std.utf.decode(bytes[], nextChar);
reader_.sliceBuilder.write(c);
if(bytes.length - nextChar > 0)
{
core.stdc.string.memmove(bytes.ptr, bytes.ptr + nextChar,
bytes.length - nextChar);
}
return bytes.length - nextChar;
}
try
{
while(reader_.peek() == '%')
{
reader_.forward();
if(bytesUsed == bytes.length)
{
bytesUsed = getDchar(bytes[], reader_);
}
ubyte b = 0;
char b = 0;
uint mult = 16;
// Converting 2 hexadecimal digits to a byte.
foreach(k; 0 .. 2)
@ -1760,15 +1792,12 @@ final class Scanner
b += mult * digit;
mult /= 16;
}
bytes ~= b;
bytes[bytesUsed++] = b;
reader_.forward(2);
}
try { return to!(dchar[])(cast(string)bytes); }
catch(ConvException e)
{
throw new Error("While scanning a " ~ name, startMark, e.msg, mark);
bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
}
catch(UTFException e)
{