Slices now nonconst in all layers up to Parser, where they get cast to string.

This commit is contained in:
Ferdinand Majerech 2014-08-02 01:58:20 +02:00
parent 7b699c5903
commit d32addacda
3 changed files with 58 additions and 49 deletions

View file

@ -104,7 +104,12 @@ class ParserException : MarkedYAMLException
private alias ParserException Error; private alias ParserException Error;
///Generates events from tokens provided by a Scanner. /// Generates events from tokens provided by a Scanner.
///
/// While Parser receives tokens with non-const character slices, the events it
/// produces are immutable strings, which are usually the same slices, cast to string.
/// Parser is the last layer of D:YAML that may possibly do any modifications to these
/// slices.
final class Parser final class Parser
{ {
private: private:
@ -256,7 +261,7 @@ final class Parser
///Parse stream start. ///Parse stream start.
Event parseStreamStart() @safe Event parseStreamStart() @safe
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
state_ = &parseImplicitDocumentStart; state_ = &parseImplicitDocumentStart;
return streamStartEvent(token.startMark, token.endMark, token.encoding); return streamStartEvent(token.startMark, token.endMark, token.encoding);
} }
@ -269,7 +274,7 @@ final class Parser
TokenID.StreamEnd)) TokenID.StreamEnd))
{ {
tagDirectives_ = defaultTagDirectives_; tagDirectives_ = defaultTagDirectives_;
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
states_ ~= &parseDocumentEnd; states_ ~= &parseDocumentEnd;
state_ = &parseBlockNode; state_ = &parseBlockNode;
@ -304,7 +309,7 @@ final class Parser
else else
{ {
//Parse the end of the stream. //Parse the end of the stream.
immutable token = scanner_.getToken(); const token = scanner_.getToken();
assert(states_.length == 0); assert(states_.length == 0);
assert(marks_.length == 0); assert(marks_.length == 0);
state_ = null; state_ = null;
@ -346,7 +351,7 @@ final class Parser
// Process directives. // Process directives.
while(scanner_.checkToken(TokenID.Directive)) while(scanner_.checkToken(TokenID.Directive))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
const value = token.value; const value = token.value;
if(token.directive == DirectiveType.YAML) if(token.directive == DirectiveType.YAML)
{ {
@ -356,11 +361,11 @@ final class Parser
enforce(minor == "1", enforce(minor == "1",
new Error("Incompatible document (version 1.x is required)", new Error("Incompatible document (version 1.x is required)",
token.startMark)); token.startMark));
YAMLVersion_ = value; YAMLVersion_ = cast(string)value;
} }
else if(token.directive == DirectiveType.TAG) else if(token.directive == DirectiveType.TAG)
{ {
auto handle = value[0 .. token.valueDivider]; auto handle = cast(string)value[0 .. token.valueDivider];
foreach(ref pair; tagDirectives_) foreach(ref pair; tagDirectives_)
{ {
@ -369,7 +374,8 @@ final class Parser
enforce(h != handle, new Error("Duplicate tag handle: " ~ handle, enforce(h != handle, new Error("Duplicate tag handle: " ~ handle,
token.startMark)); token.startMark));
} }
tagDirectives_ ~= TagDirective(handle, value[token.valueDivider .. $]); tagDirectives_ ~=
TagDirective(handle, cast(string)value[token.valueDivider .. $]);
} }
// Any other directive type is ignored (only YAML and TAG are in YAML // Any other directive type is ignored (only YAML and TAG are in YAML
// 1.1/1.2, any other directives are "reserved") // 1.1/1.2, any other directives are "reserved")
@ -417,9 +423,10 @@ final class Parser
{ {
if(scanner_.checkToken(TokenID.Alias)) if(scanner_.checkToken(TokenID.Alias))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
state_ = popState(); state_ = popState();
return aliasEvent(token.startMark, token.endMark, Anchor(token.value)); return aliasEvent(token.startMark, token.endMark,
Anchor(cast(string)token.value));
} }
string anchor = null; string anchor = null;
@ -434,7 +441,7 @@ final class Parser
{ {
if(!scanner_.checkToken(id)){return false;} if(!scanner_.checkToken(id)){return false;}
invalidMarks = false; invalidMarks = false;
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(first){startMark = token.startMark;} if(first){startMark = token.startMark;}
if(id == TokenID.Tag) if(id == TokenID.Tag)
{ {
@ -442,7 +449,7 @@ final class Parser
tagHandleEnd = token.valueDivider; tagHandleEnd = token.valueDivider;
} }
endMark = token.endMark; endMark = token.endMark;
target = token.value; target = cast(string)token.value;
return true; return true;
} }
@ -469,10 +476,10 @@ final class Parser
if(scanner_.checkToken(TokenID.Scalar)) if(scanner_.checkToken(TokenID.Scalar))
{ {
immutable token = scanner_.getToken(); auto token = scanner_.getToken();
auto value = token.style == ScalarStyle.DoubleQuoted auto value = token.style == ScalarStyle.DoubleQuoted
? handleDoubleQuotedScalarEscapes(token.value) ? handleDoubleQuotedScalarEscapes(token.value)
: token.value; : cast(string)token.value;
implicit = (token.style == ScalarStyle.Plain && tag is null) || tag == "!"; implicit = (token.style == ScalarStyle.Plain && tag is null) || tag == "!";
bool implicit_2 = (!implicit) && tag is null; bool implicit_2 = (!implicit) && tag is null;
@ -525,7 +532,7 @@ final class Parser
tuple(implicit, false) , ""); tuple(implicit, false) , "");
} }
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
throw new Error("While parsing a " ~ (block ? "block" : "flow") ~ " node", throw new Error("While parsing a " ~ (block ? "block" : "flow") ~ " node",
startMark, "expected node content, but found: " startMark, "expected node content, but found: "
~ token.idString, token.startMark); ~ token.idString, token.startMark);
@ -534,13 +541,13 @@ final class Parser
/// Handle escape sequences in a double quoted scalar. /// Handle escape sequences in a double quoted scalar.
/// ///
/// Moved here from scanner as it can't always be done in-place with slices. /// Moved here from scanner as it can't always be done in-place with slices.
string handleDoubleQuotedScalarEscapes(string tokenValue) string handleDoubleQuotedScalarEscapes(char[] tokenValue)
{ {
string notInPlace; string notInPlace;
bool inEscape = false; bool inEscape = false;
import dyaml.nogcutil; import dyaml.nogcutil;
auto appender = appenderNoGC(cast(char[])tokenValue); auto appender = appenderNoGC(cast(char[])tokenValue);
for(string oldValue = tokenValue; !oldValue.empty();) for(char[] oldValue = tokenValue; !oldValue.empty();)
{ {
const dchar c = oldValue.front(); const dchar c = oldValue.front();
oldValue.popFront(); oldValue.popFront();
@ -590,7 +597,7 @@ final class Parser
const hexLength = dyaml.escapes.escapeHexLength(c); const hexLength = dyaml.escapes.escapeHexLength(c);
// Any hex digits are 1-byte so this works. // Any hex digits are 1-byte so this works.
string hex = oldValue[0 .. hexLength]; char[] hex = oldValue[0 .. hexLength];
oldValue = oldValue[hexLength .. $]; oldValue = oldValue[hexLength .. $];
assert(!hex.canFind!(d => !d.isHexDigit), assert(!hex.canFind!(d => !d.isHexDigit),
"Scanner must ensure the hex string is valid"); "Scanner must ensure the hex string is valid");
@ -659,7 +666,7 @@ final class Parser
if(scanner_.checkToken(TokenID.BlockEntry)) if(scanner_.checkToken(TokenID.BlockEntry))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(!scanner_.checkToken(TokenID.BlockEntry, TokenID.BlockEnd)) if(!scanner_.checkToken(TokenID.BlockEntry, TokenID.BlockEnd))
{ {
states_~= &parseBlockSequenceEntry!(No.first); states_~= &parseBlockSequenceEntry!(No.first);
@ -672,7 +679,7 @@ final class Parser
if(!scanner_.checkToken(TokenID.BlockEnd)) if(!scanner_.checkToken(TokenID.BlockEnd))
{ {
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
throw new Error("While parsing a block collection", marks_.back, throw new Error("While parsing a block collection", marks_.back,
"expected block end, but found " ~ token.idString, "expected block end, but found " ~ token.idString,
token.startMark); token.startMark);
@ -680,7 +687,7 @@ final class Parser
state_ = popState(); state_ = popState();
popMark(); popMark();
immutable token = scanner_.getToken(); const token = scanner_.getToken();
return sequenceEndEvent(token.startMark, token.endMark); return sequenceEndEvent(token.startMark, token.endMark);
} }
@ -691,7 +698,7 @@ final class Parser
{ {
if(scanner_.checkToken(TokenID.BlockEntry)) if(scanner_.checkToken(TokenID.BlockEntry))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(!scanner_.checkToken(TokenID.BlockEntry, TokenID.Key, if(!scanner_.checkToken(TokenID.BlockEntry, TokenID.Key,
TokenID.Value, TokenID.BlockEnd)) TokenID.Value, TokenID.BlockEnd))
@ -705,7 +712,7 @@ final class Parser
} }
state_ = popState(); state_ = popState();
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
return sequenceEndEvent(token.startMark, token.endMark); return sequenceEndEvent(token.startMark, token.endMark);
} }
@ -723,7 +730,7 @@ final class Parser
if(scanner_.checkToken(TokenID.Key)) if(scanner_.checkToken(TokenID.Key))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(!scanner_.checkToken(TokenID.Key, TokenID.Value, TokenID.BlockEnd)) if(!scanner_.checkToken(TokenID.Key, TokenID.Value, TokenID.BlockEnd))
{ {
@ -737,7 +744,7 @@ final class Parser
if(!scanner_.checkToken(TokenID.BlockEnd)) if(!scanner_.checkToken(TokenID.BlockEnd))
{ {
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
throw new Error("While parsing a block mapping", marks_.back, throw new Error("While parsing a block mapping", marks_.back,
"expected block end, but found: " ~ token.idString, "expected block end, but found: " ~ token.idString,
token.startMark); token.startMark);
@ -745,7 +752,7 @@ final class Parser
state_ = popState(); state_ = popState();
popMark(); popMark();
immutable token = scanner_.getToken(); const token = scanner_.getToken();
return mappingEndEvent(token.startMark, token.endMark); return mappingEndEvent(token.startMark, token.endMark);
} }
@ -754,7 +761,7 @@ final class Parser
{ {
if(scanner_.checkToken(TokenID.Value)) if(scanner_.checkToken(TokenID.Value))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(!scanner_.checkToken(TokenID.Key, TokenID.Value, TokenID.BlockEnd)) if(!scanner_.checkToken(TokenID.Key, TokenID.Value, TokenID.BlockEnd))
{ {
@ -798,7 +805,7 @@ final class Parser
} }
else else
{ {
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
throw new Error("While parsing a flow sequence", marks_.back, throw new Error("While parsing a flow sequence", marks_.back,
"expected ',' or ']', but got: " ~ "expected ',' or ']', but got: " ~
token.idString, token.startMark); token.idString, token.startMark);
@ -807,7 +814,7 @@ final class Parser
if(scanner_.checkToken(TokenID.Key)) if(scanner_.checkToken(TokenID.Key))
{ {
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
state_ = &parseFlowSequenceEntryMappingKey; state_ = &parseFlowSequenceEntryMappingKey;
return mappingStartEvent(token.startMark, token.endMark, return mappingStartEvent(token.startMark, token.endMark,
Anchor(), Tag(), true, CollectionStyle.Flow); Anchor(), Tag(), true, CollectionStyle.Flow);
@ -819,7 +826,7 @@ final class Parser
} }
} }
immutable token = scanner_.getToken(); const token = scanner_.getToken();
state_ = popState(); state_ = popState();
popMark(); popMark();
return sequenceEndEvent(token.startMark, token.endMark); return sequenceEndEvent(token.startMark, token.endMark);
@ -828,7 +835,7 @@ final class Parser
///Parse a key in flow context. ///Parse a key in flow context.
Event parseFlowKey(in Event delegate() nextState) @trusted Event parseFlowKey(in Event delegate() nextState) @trusted
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(!scanner_.checkToken(TokenID.Value, TokenID.FlowEntry, if(!scanner_.checkToken(TokenID.Value, TokenID.FlowEntry,
TokenID.FlowSequenceEnd)) TokenID.FlowSequenceEnd))
@ -853,7 +860,7 @@ final class Parser
{ {
if(scanner_.checkToken(TokenID.Value)) if(scanner_.checkToken(TokenID.Value))
{ {
immutable token = scanner_.getToken(); const token = scanner_.getToken();
if(!scanner_.checkToken(TokenID.FlowEntry, checkId)) if(!scanner_.checkToken(TokenID.FlowEntry, checkId))
{ {
states_ ~= nextState; states_ ~= nextState;
@ -879,7 +886,7 @@ final class Parser
Event parseFlowSequenceEntryMappingEnd() @safe Event parseFlowSequenceEntryMappingEnd() @safe
{ {
state_ = &parseFlowSequenceEntry!(No.first); state_ = &parseFlowSequenceEntry!(No.first);
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
return mappingEndEvent(token.startMark, token.startMark); return mappingEndEvent(token.startMark, token.startMark);
} }
@ -906,7 +913,7 @@ final class Parser
} }
else else
{ {
immutable token = scanner_.peekToken(); const token = scanner_.peekToken();
throw new Error("While parsing a flow mapping", marks_.back, throw new Error("While parsing a flow mapping", marks_.back,
"expected ',' or '}', but got: " ~ "expected ',' or '}', but got: " ~
token.idString, token.startMark); token.idString, token.startMark);
@ -925,7 +932,7 @@ final class Parser
} }
} }
immutable token = scanner_.getToken(); const token = scanner_.getToken();
state_ = popState(); state_ = popState();
popMark(); popMark();
return mappingEndEvent(token.startMark, token.endMark); return mappingEndEvent(token.startMark, token.endMark);

View file

@ -472,7 +472,7 @@ final class Scanner
removePossibleSimpleKey(); removePossibleSimpleKey();
allowSimpleKey_ = false; allowSimpleKey_ = false;
const directive = scanDirective(); auto directive = scanDirective();
throwIfError(); throwIfError();
tokens_.push(directive); tokens_.push(directive);
} }
@ -656,7 +656,7 @@ final class Scanner
// No simple keys after ALIAS/ANCHOR. // No simple keys after ALIAS/ANCHOR.
allowSimpleKey_ = false; allowSimpleKey_ = false;
const anchor = scanAnchor(id); auto anchor = scanAnchor(id);
throwIfError(); throwIfError();
tokens_.push(anchor); tokens_.push(anchor);
} }
@ -704,7 +704,7 @@ final class Scanner
allowSimpleKey_ = false; allowSimpleKey_ = false;
// Scan and add SCALAR. // Scan and add SCALAR.
const scalar = scanFlowScalar(quotes); auto scalar = scanFlowScalar(quotes);
throwIfError(); throwIfError();
tokens_.push(scalar); tokens_.push(scalar);
} }
@ -721,7 +721,7 @@ final class Scanner
// No simple keys after plain scalars. But note that scanPlain() will // No simple keys after plain scalars. But note that scanPlain() will
// change this flag if the scan is finished at the beginning of the line. // change this flag if the scan is finished at the beginning of the line.
allowSimpleKey_ = false; allowSimpleKey_ = false;
const plain = scanPlain(); auto plain = scanPlain();
throwIfError(); throwIfError();
// Scan and add SCALAR. May change allowSimpleKey_ // Scan and add SCALAR. May change allowSimpleKey_
@ -915,7 +915,7 @@ final class Scanner
if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); } if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); }
else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); } else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
if(error_) { return Token.init; } if(error_) { return Token.init; }
const value = reader_.sliceBuilder.finish(); char[] value = reader_.sliceBuilder.finish();
Mark endMark = reader_.mark; Mark endMark = reader_.mark;
@ -1099,7 +1099,7 @@ final class Scanner
if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); } if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
else { scanAlphaNumericToSlice!"an anchor"(startMark); } else { scanAlphaNumericToSlice!"an anchor"(startMark); }
// On error, value is discarded as we return immediately // On error, value is discarded as we return immediately
const value = reader_.sliceBuilder.finish(); char[] value = reader_.sliceBuilder.finish();
if(error_) { return Token.init; } if(error_) { return Token.init; }
if(!" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()) && if(!" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()) &&
@ -1193,7 +1193,7 @@ final class Scanner
if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
{ {
const slice = reader_.sliceBuilder.finish(); char[] slice = reader_.sliceBuilder.finish();
return tagToken(startMark, reader_.mark, slice, handleEnd); return tagToken(startMark, reader_.mark, slice, handleEnd);
} }
@ -1340,7 +1340,7 @@ final class Scanner
} }
} }
const slice = reader_.sliceBuilder.finish(); char[] slice = reader_.sliceBuilder.finish();
return scalarToken(startMark, endMark, slice, style); return scalarToken(startMark, endMark, slice, style);
} }
@ -1780,7 +1780,7 @@ final class Scanner
} }
spacesTransaction.__dtor(); spacesTransaction.__dtor();
const slice = reader_.sliceBuilder.finish(); char[] slice = reader_.sliceBuilder.finish();
return scalarToken(startMark, endMark, slice, ScalarStyle.Plain); return scalarToken(startMark, endMark, slice, ScalarStyle.Plain);
} }
@ -2055,4 +2055,3 @@ string utf32To8(C)(C[] str) @safe pure nothrow
catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); } catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); }
catch(Exception e) { assert(false, "Unexpected exception during UTF-8 encoding"); } catch(Exception e) { assert(false, "Unexpected exception during UTF-8 encoding"); }
} }

View file

@ -65,7 +65,10 @@ struct Token
// 16B // 16B
/// Value of the token, if any. /// Value of the token, if any.
string value; ///
/// Values are char[] instead of string, as Parser may still change them in a few
/// cases. Parser casts values to strings when producing Events.
char[] value;
// 4B // 4B
/// Start position of the token in file/stream. /// Start position of the token in file/stream.
Mark startMark; Mark startMark;
@ -102,7 +105,7 @@ static assert(Token.sizeof <= 32, "Token has unexpected size");
/// end = End position of the token. /// end = End position of the token.
/// value = Value of the token. /// value = Value of the token.
/// directive = Directive type (YAML or TAG in YAML 1.1). /// directive = Directive type (YAML or TAG in YAML 1.1).
Token directiveToken(const Mark start, const Mark end, const string value, Token directiveToken(const Mark start, const Mark end, char[] value,
DirectiveType directive, const uint nameEnd) DirectiveType directive, const uint nameEnd)
{ {
return Token(value, start, end, TokenID.Directive, ScalarStyle.init, Encoding.init, return Token(value, start, end, TokenID.Directive, ScalarStyle.init, Encoding.init,
@ -147,7 +150,7 @@ alias simpleToken!(TokenID.FlowEntry) flowEntryToken;
/// value = Value of the token. /// value = Value of the token.
/// valueDivider = A hack for TagToken to store 2 values in value; the first /// valueDivider = A hack for TagToken to store 2 values in value; the first
/// value goes up to valueDivider, the second after it. /// value goes up to valueDivider, the second after it.
Token simpleValueToken(TokenID id)(const Mark start, const Mark end, const string value, Token simpleValueToken(TokenID id)(const Mark start, const Mark end, char[] value,
const uint valueDivider = uint.max) const uint valueDivider = uint.max)
{ {
return Token(value, start, end, id, ScalarStyle.Invalid, Encoding.init, return Token(value, start, end, id, ScalarStyle.Invalid, Encoding.init,
@ -165,7 +168,7 @@ alias simpleValueToken!(TokenID.Anchor) anchorToken;
/// end = End position of the token. /// end = End position of the token.
/// value = Value of the token. /// value = Value of the token.
/// style = Style of the token. /// style = Style of the token.
Token scalarToken(const Mark start, const Mark end, const string value, const ScalarStyle style) Token scalarToken(const Mark start, const Mark end, char[] value, const ScalarStyle style)
{ {
return Token(value, start, end, TokenID.Scalar, style); return Token(value, start, end, TokenID.Scalar, style);
} }