scanTagURI now scans to a slice, as does scanURIEscapes, at cost of complexity
This commit is contained in:
parent
457cabbb72
commit
817dc3b610
|
@ -991,13 +991,13 @@ final class Scanner
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
///Scan prefix of a tag directive.
|
/// Scan prefix of a tag directive.
|
||||||
dchar[] scanTagDirectivePrefix(const Mark startMark) @safe pure
|
dstring scanTagDirectivePrefix(const Mark startMark) @safe pure
|
||||||
{
|
{
|
||||||
auto value = scanTagURI("directive", startMark);
|
auto value = scanTagURI("directive", startMark);
|
||||||
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
|
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
|
||||||
new Error("While scanning a directive prefix", startMark,
|
new Error("While scanning a directive prefix", startMark,
|
||||||
"expected ' ', but found" ~ to!string(reader_.peek()),
|
"expected ' ', but found" ~ reader_.peek().to!string,
|
||||||
reader_.mark));
|
reader_.mark));
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
|
@ -1054,13 +1054,13 @@ final class Scanner
|
||||||
assert(false, "This code should never be reached");
|
assert(false, "This code should never be reached");
|
||||||
}
|
}
|
||||||
|
|
||||||
///Scan a tag token.
|
/// Scan a tag token.
|
||||||
Token scanTag() @safe pure
|
Token scanTag() @trusted pure
|
||||||
{
|
{
|
||||||
const startMark = reader_.mark;
|
const startMark = reader_.mark;
|
||||||
dchar c = reader_.peek(1);
|
dchar c = reader_.peek(1);
|
||||||
dchar[] handle;
|
dstring handle;
|
||||||
dchar[] suffix;
|
dstring suffix;
|
||||||
|
|
||||||
if(c == '<')
|
if(c == '<')
|
||||||
{
|
{
|
||||||
|
@ -1699,76 +1699,105 @@ final class Scanner
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan URI in a tag token.
|
/// Scan URI in a tag token.
|
||||||
dchar[] scanTagURI(const string name, const Mark startMark) @trusted pure
|
dstring scanTagURI(const string name, const Mark startMark) @trusted pure
|
||||||
{
|
{
|
||||||
// Note: we do not check if URI is well-formed.
|
// Note: we do not check if URI is well-formed.
|
||||||
// Using appender_, so clear it when we're done.
|
|
||||||
scope(exit) { appender_.clear(); }
|
|
||||||
uint length = 0;
|
|
||||||
|
|
||||||
|
reader_.sliceBuilder.begin();
|
||||||
dchar c = reader_.peek();
|
dchar c = reader_.peek();
|
||||||
while(isAlphaNum(c) || "-;/?:@&=+$,_.!~*\'()[]%"d.canFind(c))
|
|
||||||
{
|
{
|
||||||
if(c == '%')
|
scope(failure) { reader_.sliceBuilder.finish(); }
|
||||||
|
uint length = 0;
|
||||||
|
while(isAlphaNum(c) || "-;/?:@&=+$,_.!~*\'()[]%"d.canFind(c))
|
||||||
{
|
{
|
||||||
appender_.put(reader_.get(length));
|
if(c == '%')
|
||||||
length = 0;
|
{
|
||||||
appender_.put(scanURIEscapes(name, startMark));
|
auto chars = reader_.get(length);
|
||||||
|
reader_.sliceBuilder.write(chars);
|
||||||
|
length = 0;
|
||||||
|
scanURIEscapesToSlice(name, startMark);
|
||||||
|
}
|
||||||
|
else { ++length; }
|
||||||
|
c = reader_.peek(length);
|
||||||
|
}
|
||||||
|
if(length > 0)
|
||||||
|
{
|
||||||
|
auto chars = reader_.get(length);
|
||||||
|
reader_.sliceBuilder.write(chars);
|
||||||
|
length = 0;
|
||||||
}
|
}
|
||||||
else { ++length; }
|
|
||||||
c = reader_.peek(length);
|
|
||||||
}
|
}
|
||||||
if(length > 0)
|
dstring result = reader_.sliceBuilder.finish();
|
||||||
{
|
enforce(!result.empty,
|
||||||
appender_.put(reader_.get(length));
|
|
||||||
length = 0;
|
|
||||||
}
|
|
||||||
enforce(appender_.data.length > 0,
|
|
||||||
new Error("While parsing a " ~ name, startMark,
|
new Error("While parsing a " ~ name, startMark,
|
||||||
"expected URI, but found: " ~ c.to!string, reader_.mark));
|
"expected URI, but found: " ~ c.to!string, reader_.mark));
|
||||||
|
|
||||||
return appender_.data;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan URI escape sequences.
|
/// Scan URI escape sequences.
|
||||||
dchar[] scanURIEscapes(const string name, const Mark startMark) @system pure
|
void scanURIEscapesToSlice(const string name, const Mark startMark) @system pure
|
||||||
{
|
{
|
||||||
ubyte[] bytes;
|
// URI escapes encode a UTF-8 string. We store UTF-8 code units here for
|
||||||
|
// decoding into UTF-32.
|
||||||
|
char[4] bytes;
|
||||||
|
size_t bytesUsed;
|
||||||
Mark mark = reader_.mark;
|
Mark mark = reader_.mark;
|
||||||
|
|
||||||
while(reader_.peek() == '%')
|
// Get one dchar by decoding data from bytes.
|
||||||
|
//
|
||||||
|
// This is probably slow, but simple and URI escapes are extremely uncommon
|
||||||
|
// in YAML.
|
||||||
|
static size_t getDchar(char[] bytes, Reader reader_)
|
||||||
{
|
{
|
||||||
reader_.forward();
|
import std.utf;
|
||||||
|
size_t nextChar;
|
||||||
ubyte b = 0;
|
const c = std.utf.decode(bytes[], nextChar);
|
||||||
uint mult = 16;
|
reader_.sliceBuilder.write(c);
|
||||||
// Converting 2 hexadecimal digits to a byte.
|
if(bytes.length - nextChar > 0)
|
||||||
foreach(k; 0 .. 2)
|
|
||||||
{
|
{
|
||||||
const dchar c = reader_.peek(k);
|
core.stdc.string.memmove(bytes.ptr, bytes.ptr + nextChar,
|
||||||
enforce(isHexDigit(c),
|
bytes.length - nextChar);
|
||||||
new Error("While scanning a " ~ name, startMark,
|
|
||||||
"expected URI escape sequence of "
|
|
||||||
"2 hexadecimal numbers, but found: " ~
|
|
||||||
c.to!string, reader_.mark));
|
|
||||||
|
|
||||||
uint digit;
|
|
||||||
if(c - '0' < 10) { digit = c - '0'; }
|
|
||||||
else if(c - 'A' < 6) { digit = c - 'A'; }
|
|
||||||
else if(c - 'a' < 6) { digit = c - 'a'; }
|
|
||||||
else { assert(false); }
|
|
||||||
b += mult * digit;
|
|
||||||
mult /= 16;
|
|
||||||
}
|
}
|
||||||
bytes ~= b;
|
return bytes.length - nextChar;
|
||||||
|
|
||||||
reader_.forward(2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try { return to!(dchar[])(cast(string)bytes); }
|
try
|
||||||
catch(ConvException e)
|
|
||||||
{
|
{
|
||||||
throw new Error("While scanning a " ~ name, startMark, e.msg, mark);
|
while(reader_.peek() == '%')
|
||||||
|
{
|
||||||
|
reader_.forward();
|
||||||
|
if(bytesUsed == bytes.length)
|
||||||
|
{
|
||||||
|
bytesUsed = getDchar(bytes[], reader_);
|
||||||
|
}
|
||||||
|
|
||||||
|
char b = 0;
|
||||||
|
uint mult = 16;
|
||||||
|
// Converting 2 hexadecimal digits to a byte.
|
||||||
|
foreach(k; 0 .. 2)
|
||||||
|
{
|
||||||
|
const dchar c = reader_.peek(k);
|
||||||
|
enforce(isHexDigit(c),
|
||||||
|
new Error("While scanning a " ~ name, startMark,
|
||||||
|
"expected URI escape sequence of "
|
||||||
|
"2 hexadecimal numbers, but found: " ~
|
||||||
|
c.to!string, reader_.mark));
|
||||||
|
|
||||||
|
uint digit;
|
||||||
|
if(c - '0' < 10) { digit = c - '0'; }
|
||||||
|
else if(c - 'A' < 6) { digit = c - 'A'; }
|
||||||
|
else if(c - 'a' < 6) { digit = c - 'a'; }
|
||||||
|
else { assert(false); }
|
||||||
|
b += mult * digit;
|
||||||
|
mult /= 16;
|
||||||
|
}
|
||||||
|
bytes[bytesUsed++] = b;
|
||||||
|
|
||||||
|
reader_.forward(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
|
||||||
}
|
}
|
||||||
catch(UTFException e)
|
catch(UTFException e)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue