dyaml/source/dyaml/scanner.d

2080 lines
79 KiB
D
Raw Normal View History

2011-08-16 12:53:13 +00:00
2014-07-23 00:54:50 +00:00
// Copyright Ferdinand Majerech 2011-2014.
2011-08-16 12:53:13 +00:00
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
2014-07-26 21:29:55 +00:00
/// YAML scanner.
/// Code based on PyYAML: http://www.pyyaml.org
2011-08-16 12:53:13 +00:00
module dyaml.scanner;
import core.stdc.string;
import std.algorithm;
import std.array;
import std.container;
2011-08-16 12:53:13 +00:00
import std.conv;
import std.ascii : isAlphaNum, isDigit, isHexDigit;
import std.exception;
import std.string;
import std.typecons;
import std.traits : Unqual;
2011-08-16 12:53:13 +00:00
import dyaml.fastcharsearch;
import dyaml.escapes;
2011-08-16 12:53:13 +00:00
import dyaml.exception;
import dyaml.nogcutil;
import dyaml.queue;
2011-08-16 12:53:13 +00:00
import dyaml.reader;
import dyaml.style;
2011-08-16 12:53:13 +00:00
import dyaml.token;
package:
2014-07-26 21:29:55 +00:00
/// Scanner produces tokens of the following types:
/// STREAM-START
/// STREAM-END
/// DIRECTIVE(name, value)
/// DOCUMENT-START
/// DOCUMENT-END
/// BLOCK-SEQUENCE-START
/// BLOCK-MAPPING-START
/// BLOCK-END
/// FLOW-SEQUENCE-START
/// FLOW-MAPPING-START
/// FLOW-SEQUENCE-END
/// FLOW-MAPPING-END
/// BLOCK-ENTRY
/// FLOW-ENTRY
/// KEY
/// VALUE
/// ALIAS(value)
/// ANCHOR(value)
/// TAG(value)
/// SCALAR(value, plain, style)
/// Marked exception thrown at scanner errors.
2014-07-29 21:08:37 +00:00
///
2014-07-26 21:29:55 +00:00
/// See_Also: MarkedYAMLException
2011-08-16 12:53:13 +00:00
class ScannerException : MarkedYAMLException
{
mixin MarkedExceptionCtors;
2011-08-16 12:53:13 +00:00
}
2014-07-23 16:26:39 +00:00
/// Generates tokens from data provided by a Reader.
2011-08-16 12:53:13 +00:00
final class Scanner
{
private:
2014-07-23 16:26:39 +00:00
/// A simple key is a key that is not denoted by the '?' indicator.
/// For example:
/// ---
/// block simple key: value
/// ? not a simple key:
/// : { flow simple key: value }
/// We emit the KEY token before all keys, so when we find a potential simple
/// key, we try to locate the corresponding ':' indicator. Simple keys should be
/// limited to a single line and 1024 characters.
///
/// 16 bytes on 64-bit.
2011-08-16 12:53:13 +00:00
static struct SimpleKey
{
2014-07-23 16:26:39 +00:00
/// Character index in reader where the key starts.
uint charIndex = uint.max;
2014-07-23 16:26:39 +00:00
/// Index of the key token from start (first token scanned being 0).
2011-08-16 12:53:13 +00:00
uint tokenIndex;
2014-07-23 16:26:39 +00:00
/// Line the key starts at.
uint line;
2014-07-23 16:26:39 +00:00
/// Column the key starts at.
ushort column;
2014-07-23 16:26:39 +00:00
/// Is this required to be a simple key?
2011-08-16 12:53:13 +00:00
bool required;
2014-07-23 16:26:39 +00:00
/// Is this struct "null" (invalid)?.
bool isNull;
2011-08-16 12:53:13 +00:00
}
2014-07-23 16:26:39 +00:00
/// Block chomping types.
2011-08-16 12:53:13 +00:00
enum Chomping
{
2014-07-23 16:26:39 +00:00
/// Strip all trailing line breaks. '-' indicator.
2011-08-16 12:53:13 +00:00
Strip,
2014-07-23 16:26:39 +00:00
/// Line break of the last line is preserved, others discarded. Default.
2014-07-22 00:12:18 +00:00
Clip,
2014-07-23 16:26:39 +00:00
/// All trailing line breaks are preserved. '+' indicator.
2014-07-22 00:12:18 +00:00
Keep
2011-08-16 12:53:13 +00:00
}
2014-07-23 16:26:39 +00:00
/// Reader used to read from a file/stream.
2011-08-16 12:53:13 +00:00
Reader reader_;
2014-07-23 16:26:39 +00:00
/// Are we done scanning?
2011-08-16 12:53:13 +00:00
bool done_;
2014-07-23 16:26:39 +00:00
/// Level of nesting in flow context. If 0, we're in block context.
2011-08-16 12:53:13 +00:00
uint flowLevel_;
2014-07-23 16:26:39 +00:00
/// Current indentation level.
2011-08-16 12:53:13 +00:00
int indent_ = -1;
2014-07-23 16:26:39 +00:00
/// Past indentation levels. Used as a stack.
Array!int indents_;
2011-08-16 12:53:13 +00:00
2014-07-23 16:26:39 +00:00
/// Processed tokens not yet emitted. Used as a queue.
Queue!Token tokens_;
2014-07-23 16:26:39 +00:00
/// Number of tokens emitted through the getToken method.
2011-08-16 12:53:13 +00:00
uint tokensTaken_;
2014-07-23 16:26:39 +00:00
/// Can a simple key start at the current position? A simple key may start:
/// - at the beginning of the line, not counting indentation spaces
/// (in block context),
/// - after '{', '[', ',' (in the flow context),
/// - after '?', ':', '-' (in the block context).
/// In the block context, this flag also signifies if a block collection
/// may start at the current position.
2011-08-16 12:53:13 +00:00
bool allowSimpleKey_ = true;
2014-07-23 16:26:39 +00:00
/// Possible simple keys indexed by flow levels.
SimpleKey[] possibleSimpleKeys_;
2011-08-16 12:53:13 +00:00
/// Set on error by nothrow/@nogc inner functions along with errorData_.
///
/// Non-nothrow/GC-using caller functions can then throw an exception using
/// data stored in errorData_.
bool error_;
/// Data for the exception to throw if error_ is true.
MarkedYAMLExceptionData errorData_;
/// Error messages can be built in this buffer without using the GC.
///
/// ScannerException (MarkedYAMLException) copies string data passed to its
/// constructor so it's safe to use slices of this buffer as parameters for
/// exceptions that may outlive the Scanner. The GC allocation when creating the
/// error message is removed, but the allocation when creating an exception is
/// not.
char[256] msgBuffer_;
2011-08-16 12:53:13 +00:00
public:
2014-07-25 16:26:23 +00:00
/// Construct a Scanner using specified Reader.
2014-07-24 01:50:39 +00:00
this(Reader reader) @safe nothrow
2011-08-16 12:53:13 +00:00
{
2014-07-25 16:26:23 +00:00
// Return the next token, but do not delete it from the queue
reader_ = reader;
2011-08-16 12:53:13 +00:00
fetchStreamStart();
}
2014-07-25 16:26:23 +00:00
/// Destroy the scanner.
@trusted ~this()
2011-08-16 12:53:13 +00:00
{
2014-07-19 02:17:18 +00:00
tokens_.destroy();
indents_.destroy();
possibleSimpleKeys_.destroy();
2011-08-16 12:53:13 +00:00
possibleSimpleKeys_ = null;
reader_ = null;
}
2014-07-26 21:29:55 +00:00
/// Check if the next token is one of specified types.
///
/// If no types are specified, checks if any tokens are left.
///
/// Params: ids = Token IDs to check for.
///
/// Returns: true if the next token is one of specified types, or if there are
/// any tokens left if no types specified, false otherwise.
bool checkToken(const TokenID[] ids ...) @safe
2011-08-16 12:53:13 +00:00
{
2014-08-04 23:51:20 +00:00
// Check if the next token is one of specified types.
2014-07-26 21:29:55 +00:00
while(needMoreTokens()) { fetchToken(); }
2011-08-16 12:53:13 +00:00
if(!tokens_.empty)
{
2014-07-26 21:29:55 +00:00
if(ids.length == 0) { return true; }
2011-08-16 12:53:13 +00:00
else
{
const nextId = tokens_.peek().id;
2011-08-16 12:53:13 +00:00
foreach(id; ids)
{
2014-07-26 21:29:55 +00:00
if(nextId == id) { return true; }
2011-08-16 12:53:13 +00:00
}
}
}
return false;
}
2014-07-26 21:29:55 +00:00
/// Return the next token, but keep it in the queue.
///
/// Must not be called if there are no tokens left.
ref const(Token) peekToken() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
while(needMoreTokens) { fetchToken(); }
if(!tokens_.empty) { return tokens_.peek(); }
2011-08-16 12:53:13 +00:00
assert(false, "No token left to peek");
}
2014-07-26 21:29:55 +00:00
/// Return the next token, removing it from the queue.
///
/// Must not be called if there are no tokens left.
Token getToken() @safe
2011-08-16 12:53:13 +00:00
{
while(needMoreTokens){fetchToken();}
if(!tokens_.empty)
{
++tokensTaken_;
return tokens_.pop();
2011-08-16 12:53:13 +00:00
}
assert(false, "No token left to get");
}
private:
/// Build an error message in msgBuffer_ and return it as a string.
string buildMsg(S ...)(S args) @trusted pure nothrow @nogc
{
return cast(string)msgBuffer_.printNoGC(args);
}
/// Most scanning error messages have the same format; so build them with this
/// function.
2014-07-26 16:20:57 +00:00
string expected(T)(string expected, T found) @safe pure nothrow @nogc
{
return buildMsg("expected ", expected, ", but found ", found);
}
/// If error_ is true, throws a ScannerException constructed from errorData_ and
/// sets error_ to false.
void throwIfError() @safe pure
{
if(!error_) { return; }
error_ = false;
throw new ScannerException(errorData_);
}
/// Called by internal nothrow/@nogc methods to set an error to be thrown by
/// their callers.
///
/// See_Also: dyaml.exception.MarkedYamlException
void error(string context, const Mark contextMark, string problem,
const Mark problemMark) @safe pure nothrow @nogc
{
assert(error_ == false,
"Setting an error when there already is a not yet thrown error");
error_ = true;
errorData_ = MarkedYAMLExceptionData(context, contextMark, problem, problemMark);
}
2014-07-26 21:29:55 +00:00
/// Determine whether or not we need to fetch more tokens before peeking/getting a token.
2014-07-24 01:50:39 +00:00
bool needMoreTokens() @safe pure
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
if(done_) { return false; }
if(tokens_.empty) { return true; }
2014-07-22 00:12:18 +00:00
2014-07-26 21:29:55 +00:00
/// The current token may be a potential simple key, so we need to look further.
2011-08-16 12:53:13 +00:00
stalePossibleSimpleKeys();
return nextPossibleSimpleKey() == tokensTaken_;
}
2014-07-26 21:29:55 +00:00
/// Fetch at token, adding it to tokens_.
2014-07-24 01:50:39 +00:00
void fetchToken() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
// Eat whitespaces and comments until we reach the next token.
2011-08-16 12:53:13 +00:00
scanToNextToken();
2014-07-26 21:29:55 +00:00
// Remove obsolete possible simple keys.
2011-08-16 12:53:13 +00:00
stalePossibleSimpleKeys();
2014-07-26 21:29:55 +00:00
// Compare current indentation and column. It may add some tokens
// and decrease the current indentation level.
2011-08-16 12:53:13 +00:00
unwindIndent(reader_.column);
2014-07-26 21:29:55 +00:00
// Get the next character.
2014-08-04 00:26:14 +00:00
const dchar c = reader_.peekByte();
2011-08-16 12:53:13 +00:00
2014-07-26 21:29:55 +00:00
// Fetch the token.
2014-08-04 00:26:14 +00:00
if(c == '\0') { return fetchStreamEnd(); }
if(checkDirective()) { return fetchDirective(); }
if(checkDocumentStart()) { return fetchDocumentStart(); }
if(checkDocumentEnd()) { return fetchDocumentEnd(); }
2014-07-26 21:29:55 +00:00
// Order of the following checks is NOT significant.
2014-08-04 00:26:14 +00:00
switch(c)
{
case '[': return fetchFlowSequenceStart();
case '{': return fetchFlowMappingStart();
case ']': return fetchFlowSequenceEnd();
case '}': return fetchFlowMappingEnd();
case ',': return fetchFlowEntry();
case '!': return fetchTag();
case '\'': return fetchSingle();
case '\"': return fetchDouble();
case '*': return fetchAlias();
case '&': return fetchAnchor();
case '?': if(checkKey()) { return fetchKey(); } goto default;
case ':': if(checkValue()) { return fetchValue(); } goto default;
case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break;
case '>': if(flowLevel_ == 0) { return fetchFolded(); } break;
default: if(checkPlain()) { return fetchPlain(); }
}
2014-07-26 21:29:55 +00:00
throw new ScannerException("While scanning for the next token, found character "
"\'%s\', index %s that cannot start any token"
.format(c, to!int(c)), reader_.mark);
}
/// Return the token number of the nearest possible simple key.
2014-07-24 01:50:39 +00:00
uint nextPossibleSimpleKey() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
uint minTokenNumber = uint.max;
foreach(k, ref simpleKey; possibleSimpleKeys_)
{
2014-07-26 21:29:55 +00:00
if(simpleKey.isNull) { continue; }
2011-08-16 12:53:13 +00:00
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
}
return minTokenNumber;
}
2014-07-26 21:29:55 +00:00
/// Remove entries that are no longer possible simple keys.
///
/// According to the YAML specification, simple keys
/// - should be limited to a single line,
/// - should be no longer than 1024 characters.
/// Disabling this will allow simple keys of any length and
/// height (may cause problems if indentation is broken though).
2014-07-24 01:50:39 +00:00
void stalePossibleSimpleKeys() @safe pure
2011-08-16 12:53:13 +00:00
{
foreach(level, ref key; possibleSimpleKeys_)
{
2014-07-26 21:29:55 +00:00
if(key.isNull) { continue; }
2011-08-16 12:53:13 +00:00
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
{
2014-07-22 00:12:18 +00:00
enforce(!key.required,
2014-07-26 21:29:55 +00:00
new ScannerException("While scanning a simple key",
Mark(key.line, key.column),
"could not find expected ':'", reader_.mark));
key.isNull = true;
2011-08-16 12:53:13 +00:00
}
}
}
2014-07-26 21:29:55 +00:00
/// Check if the next token starts a possible simple key and if so, save its position.
///
/// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
2014-07-24 01:50:39 +00:00
void savePossibleSimpleKey() @safe pure
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
// Check if a simple key is required at the current position.
const required = (flowLevel_ == 0 && indent_ == reader_.column);
2011-08-16 12:53:13 +00:00
assert(allowSimpleKey_ || !required, "A simple key is required only if it is "
"the first token in the current line. Therefore it is always allowed.");
2014-07-26 21:29:55 +00:00
if(!allowSimpleKey_) { return; }
2011-08-16 12:53:13 +00:00
2014-07-26 21:29:55 +00:00
// The next token might be a simple key, so save its number and position.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
2014-07-26 21:29:55 +00:00
const line = reader_.line;
const column = reader_.column;
2014-07-26 21:29:55 +00:00
const key = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line,
cast(ushort)min(column, ushort.max), required);
if(possibleSimpleKeys_.length <= flowLevel_)
{
const oldLength = possibleSimpleKeys_.length;
possibleSimpleKeys_.length = flowLevel_ + 1;
//No need to initialize the last element, it's already done in the next line.
possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
}
2011-08-16 12:53:13 +00:00
possibleSimpleKeys_[flowLevel_] = key;
}
2014-07-26 21:29:55 +00:00
/// Remove the saved possible key position at the current flow level.
2014-07-24 01:50:39 +00:00
void removePossibleSimpleKey() @safe pure
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
if(possibleSimpleKeys_.length <= flowLevel_) { return; }
if(!possibleSimpleKeys_[flowLevel_].isNull)
2011-08-16 12:53:13 +00:00
{
const key = possibleSimpleKeys_[flowLevel_];
2014-07-22 00:12:18 +00:00
enforce(!key.required,
new ScannerException("While scanning a simple key",
Mark(key.line, key.column),
"could not find expected ':'", reader_.mark));
possibleSimpleKeys_[flowLevel_].isNull = true;
2011-08-16 12:53:13 +00:00
}
}
2014-07-26 21:29:55 +00:00
/// Decrease indentation, removing entries in indents_.
///
/// Params: column = Current column in the file/stream.
void unwindIndent(const int column) @trusted
2011-08-16 12:53:13 +00:00
{
if(flowLevel_ > 0)
{
2014-07-26 21:29:55 +00:00
// In flow context, tokens should respect indentation.
// The condition should be `indent >= column` according to the spec.
// But this condition will prohibit intuitively correct
// constructions such as
// key : {
// }
2011-08-16 12:53:13 +00:00
// In the flow context, indentation is ignored. We make the scanner less
// restrictive than what the specification requires.
// if(pedantic_ && flowLevel_ > 0 && indent_ > column)
// {
// throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
// reader_.mark)
// }
2011-08-16 12:53:13 +00:00
return;
}
2014-07-26 21:29:55 +00:00
// In block context, we may need to issue the BLOCK-END tokens.
2011-08-16 12:53:13 +00:00
while(indent_ > column)
{
indent_ = indents_.back;
indents_.length = indents_.length - 1;
tokens_.push(blockEndToken(reader_.mark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
}
2014-07-26 21:29:55 +00:00
/// Increase indentation if needed.
///
/// Params: column = Current column in the file/stream.
///
/// Returns: true if the indentation was increased, false otherwise.
bool addIndent(int column) @trusted
2011-08-16 12:53:13 +00:00
{
if(indent_ >= column){return false;}
indents_ ~= indent_;
indent_ = column;
return true;
}
2014-07-26 21:29:55 +00:00
/// Add STREAM-START token.
void fetchStreamStart() @safe nothrow
2011-08-16 12:53:13 +00:00
{
tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
2011-08-16 12:53:13 +00:00
}
///Add STREAM-END token.
void fetchStreamEnd() @safe
2011-08-16 12:53:13 +00:00
{
//Set intendation to -1 .
unwindIndent(-1);
removePossibleSimpleKey();
allowSimpleKey_ = false;
2014-07-19 02:17:18 +00:00
possibleSimpleKeys_.destroy;
2011-08-16 12:53:13 +00:00
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
2011-08-16 12:53:13 +00:00
done_ = true;
}
/// Add DIRECTIVE token.
void fetchDirective() @safe
2011-08-16 12:53:13 +00:00
{
// Set intendation to -1 .
2011-08-16 12:53:13 +00:00
unwindIndent(-1);
// Reset simple keys.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
allowSimpleKey_ = false;
auto directive = scanDirective();
throwIfError();
tokens_.push(directive);
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Add DOCUMENT-START or DOCUMENT-END token.
void fetchDocumentIndicator(TokenID id)() @safe
2011-08-16 12:53:13 +00:00
if(id == TokenID.DocumentStart || id == TokenID.DocumentEnd)
{
2014-07-26 21:29:55 +00:00
// Set indentation to -1 .
2011-08-16 12:53:13 +00:00
unwindIndent(-1);
2014-07-26 21:29:55 +00:00
// Reset simple keys. Note that there can't be a block collection after '---'.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
allowSimpleKey_ = false;
Mark startMark = reader_.mark;
reader_.forward(3);
tokens_.push(simpleToken!id(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Aliases to add DOCUMENT-START or DOCUMENT-END token.
2011-08-16 12:53:13 +00:00
alias fetchDocumentIndicator!(TokenID.DocumentStart) fetchDocumentStart;
alias fetchDocumentIndicator!(TokenID.DocumentEnd) fetchDocumentEnd;
2014-07-26 21:29:55 +00:00
/// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
void fetchFlowCollectionStart(TokenID id)() @trusted
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
// '[' and '{' may start a simple key.
2011-08-16 12:53:13 +00:00
savePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// Simple keys are allowed after '[' and '{'.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = true;
++flowLevel_;
Mark startMark = reader_.mark;
reader_.forward();
tokens_.push(simpleToken!id(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
2011-08-16 12:53:13 +00:00
alias fetchFlowCollectionStart!(TokenID.FlowSequenceStart) fetchFlowSequenceStart;
alias fetchFlowCollectionStart!(TokenID.FlowMappingStart) fetchFlowMappingStart;
2014-07-26 21:29:55 +00:00
/// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
void fetchFlowCollectionEnd(TokenID id)() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
// Reset possible simple key on the current level.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// No simple keys after ']' and '}'.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = false;
--flowLevel_;
Mark startMark = reader_.mark;
reader_.forward();
tokens_.push(simpleToken!id(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
2011-08-16 12:53:13 +00:00
alias fetchFlowCollectionEnd!(TokenID.FlowSequenceEnd) fetchFlowSequenceEnd;
alias fetchFlowCollectionEnd!(TokenID.FlowMappingEnd) fetchFlowMappingEnd;
2014-07-26 21:29:55 +00:00
/// Add FLOW-ENTRY token;
void fetchFlowEntry() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
// Reset possible simple key on the current level.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// Simple keys are allowed after ','.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = true;
Mark startMark = reader_.mark;
reader_.forward();
tokens_.push(flowEntryToken(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Additional checks used in block context in fetchBlockEntry and fetchKey.
///
/// Params: type = String representing the token type we might need to add.
/// id = Token type we might need to add.
void blockChecks(string type, TokenID id)() @safe
2011-08-16 12:53:13 +00:00
{
enum context = type ~ " keys are not allowed here";
// Are we allowed to start a key (not neccesarily a simple one)?
enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));
2011-08-16 12:53:13 +00:00
if(addIndent(reader_.column))
{
tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
}
2014-07-26 21:29:55 +00:00
/// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
void fetchBlockEntry() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.BlockSequenceStart)(); }
2014-07-22 00:12:18 +00:00
2014-07-26 21:29:55 +00:00
// It's an error for the block entry to occur in the flow context,
// but we let the parser detect this.
2011-08-16 12:53:13 +00:00
2014-07-26 21:29:55 +00:00
// Reset possible simple key on the current level.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// Simple keys are allowed after '-'.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = true;
Mark startMark = reader_.mark;
reader_.forward();
tokens_.push(blockEntryToken(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Add KEY token. Might add BLOCK-MAPPING-START in the process.
void fetchKey() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-26 21:29:55 +00:00
if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.BlockMappingStart)(); }
2011-08-16 12:53:13 +00:00
2014-07-26 21:29:55 +00:00
// Reset possible simple key on the current level.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// Simple keys are allowed after '?' in the block context.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = (flowLevel_ == 0);
Mark startMark = reader_.mark;
reader_.forward();
tokens_.push(keyToken(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
void fetchValue() @safe
2011-08-16 12:53:13 +00:00
{
//Do we determine a simple key?
2014-07-22 00:12:18 +00:00
if(possibleSimpleKeys_.length > flowLevel_ &&
!possibleSimpleKeys_[flowLevel_].isNull)
2011-08-16 12:53:13 +00:00
{
const key = possibleSimpleKeys_[flowLevel_];
possibleSimpleKeys_[flowLevel_].isNull = true;
2011-08-16 12:53:13 +00:00
Mark keyMark = Mark(key.line, key.column);
const idx = key.tokenIndex - tokensTaken_;
2011-08-16 12:53:13 +00:00
assert(idx >= 0);
2014-07-26 21:29:55 +00:00
// Add KEY.
// Manually inserting since tokens are immutable (need linked list).
tokens_.insert(keyToken(keyMark, keyMark), idx);
2011-08-16 12:53:13 +00:00
2014-07-26 21:29:55 +00:00
// If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
2011-08-16 12:53:13 +00:00
if(flowLevel_ == 0 && addIndent(key.column))
{
tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
// There cannot be two simple keys in a row.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = false;
}
2014-07-26 21:29:55 +00:00
// Part of a complex key
2011-08-16 12:53:13 +00:00
else
{
2014-07-26 21:29:55 +00:00
// We can start a complex value if and only if we can start a simple key.
2011-08-16 12:53:13 +00:00
enforce(flowLevel_ > 0 || allowSimpleKey_,
new ScannerException("Mapping values are not allowed here", reader_.mark));
2011-08-16 12:53:13 +00:00
2014-07-26 21:29:55 +00:00
// If this value starts a new block mapping, we need to add
// BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
2011-08-16 12:53:13 +00:00
if(flowLevel_ == 0 && addIndent(reader_.column))
{
tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
// Reset possible simple key on the current level.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// Simple keys are allowed after ':' in the block context.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = (flowLevel_ == 0);
}
2014-07-26 21:29:55 +00:00
// Add VALUE.
2011-08-16 12:53:13 +00:00
Mark startMark = reader_.mark;
reader_.forward();
tokens_.push(valueToken(startMark, reader_.mark));
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Add ALIAS or ANCHOR token.
void fetchAnchor_(TokenID id)() @trusted
2011-08-16 12:53:13 +00:00
if(id == TokenID.Alias || id == TokenID.Anchor)
{
2014-07-26 21:29:55 +00:00
// ALIAS/ANCHOR could be a simple key.
2011-08-16 12:53:13 +00:00
savePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// No simple keys after ALIAS/ANCHOR.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = false;
auto anchor = scanAnchor(id);
2014-07-26 11:31:31 +00:00
throwIfError();
tokens_.push(anchor);
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Aliases to add ALIAS or ANCHOR token.
2011-08-16 12:53:13 +00:00
alias fetchAnchor_!(TokenID.Alias) fetchAlias;
alias fetchAnchor_!(TokenID.Anchor) fetchAnchor;
2014-07-26 21:29:55 +00:00
/// Add TAG token.
void fetchTag() @trusted
2011-08-16 12:53:13 +00:00
{
//TAG could start a simple key.
savePossibleSimpleKey();
//No simple keys after TAG.
allowSimpleKey_ = false;
tokens_.push(scanTag());
2014-07-25 16:28:08 +00:00
throwIfError();
2011-08-16 12:53:13 +00:00
}
2014-07-26 21:29:55 +00:00
/// Add block SCALAR token.
void fetchBlockScalar(ScalarStyle style)() @trusted
2011-08-16 12:53:13 +00:00
if(style == ScalarStyle.Literal || style == ScalarStyle.Folded)
{
2014-07-26 21:29:55 +00:00
// Reset possible simple key on the current level.
2011-08-16 12:53:13 +00:00
removePossibleSimpleKey();
2014-07-26 21:29:55 +00:00
// A simple key may follow a block scalar.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = true;
2014-07-26 00:53:05 +00:00
auto blockScalar = scanBlockScalar(style);
throwIfError();
tokens_.push(blockScalar);
2011-08-16 12:53:13 +00:00
}
2014-07-24 21:22:26 +00:00
/// Aliases to add literal or folded block scalar.
2011-08-16 12:53:13 +00:00
alias fetchBlockScalar!(ScalarStyle.Literal) fetchLiteral;
alias fetchBlockScalar!(ScalarStyle.Folded) fetchFolded;
2014-07-24 21:22:14 +00:00
/// Add quoted flow SCALAR token.
void fetchFlowScalar(ScalarStyle quotes)() @safe
2011-08-16 12:53:13 +00:00
{
2014-07-24 21:22:26 +00:00
// A flow scalar could be a simple key.
2011-08-16 12:53:13 +00:00
savePossibleSimpleKey();
2014-07-24 21:22:26 +00:00
// No simple keys after flow scalars.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = false;
// Scan and add SCALAR.
auto scalar = scanFlowScalar(quotes);
throwIfError();
tokens_.push(scalar);
2011-08-16 12:53:13 +00:00
}
2014-07-24 21:22:26 +00:00
/// Aliases to add single or double quoted block scalar.
2011-08-16 12:53:13 +00:00
alias fetchFlowScalar!(ScalarStyle.SingleQuoted) fetchSingle;
alias fetchFlowScalar!(ScalarStyle.DoubleQuoted) fetchDouble;
/// Add plain SCALAR token.
2014-07-24 21:22:14 +00:00
void fetchPlain() @safe
2011-08-16 12:53:13 +00:00
{
// A plain scalar could be a simple key
2011-08-16 12:53:13 +00:00
savePossibleSimpleKey();
// No simple keys after plain scalars. But note that scanPlain() will
// change this flag if the scan is finished at the beginning of the line.
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = false;
auto plain = scanPlain();
throwIfError();
// Scan and add SCALAR. May change allowSimpleKey_
tokens_.push(plain);
2011-08-16 12:53:13 +00:00
}
///Check if the next token is DIRECTIVE: ^ '%' ...
2014-07-24 01:50:39 +00:00
bool checkDirective() @safe pure nothrow @nogc
{
return reader_.peekByte() == '%' && reader_.column == 0;
}
2011-08-16 12:53:13 +00:00
2014-07-23 00:17:19 +00:00
/// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n')
2014-07-26 21:30:13 +00:00
bool checkDocumentStart() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-23 00:17:19 +00:00
// Check one char first, then all 3, to prevent reading outside the buffer.
return reader_.column == 0 &&
reader_.peekByte() == '-' &&
reader_.prefix(3) == "---" &&
2014-07-22 00:12:18 +00:00
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
2011-08-16 12:53:13 +00:00
}
2014-07-23 00:17:19 +00:00
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
2014-07-24 01:50:39 +00:00
bool checkDocumentEnd() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-23 00:17:19 +00:00
// Check one char first, then all 3, to prevent reading outside the buffer.
return reader_.column == 0 &&
reader_.peekByte() == '.' &&
reader_.prefix(3) == "..." &&
2014-07-22 00:12:18 +00:00
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
2011-08-16 12:53:13 +00:00
}
2014-07-29 21:08:37 +00:00
/// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
2014-07-24 01:50:39 +00:00
bool checkBlockEntry() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-08-04 00:26:14 +00:00
return " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(1));
2011-08-16 12:53:13 +00:00
}
2014-07-29 21:08:37 +00:00
/// Check if the next token is KEY(flow context): '?'
2014-08-01 23:19:29 +00:00
///
2014-07-29 21:08:37 +00:00
/// or KEY(block context): '?' (' '|'\n')
2014-07-24 01:50:39 +00:00
bool checkKey() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-08-04 00:26:14 +00:00
return (flowLevel_ > 0 ||
2014-07-22 00:12:18 +00:00
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(1)));
2011-08-16 12:53:13 +00:00
}
2014-07-29 21:08:37 +00:00
/// Check if the next token is VALUE(flow context): ':'
2014-08-01 23:19:29 +00:00
///
2014-07-29 21:08:37 +00:00
/// or VALUE(block context): ':' (' '|'\n')
2014-07-24 01:50:39 +00:00
bool checkValue() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-08-04 00:26:14 +00:00
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d search;
return flowLevel_ > 0 || search.canFind(reader_.peek(1));
2011-08-16 12:53:13 +00:00
}
2014-07-29 21:08:37 +00:00
/// Check if the next token is a plain scalar.
///
/// A plain scalar may start with any non-space character except:
/// '-', '?', ':', ',', '[', ']', '{', '}',
/// '#', '&', '*', '!', '|', '>', '\'', '\"',
/// '%', '@', '`'.
///
/// It may also start with
/// '-', '?', ':'
/// if it is followed by a non-space character.
///
/// Note that we limit the last rule to the block context (except the
/// '-' character) because we want the flow context to be space
/// independent.
2014-07-24 01:50:39 +00:00
bool checkPlain() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
const c = reader_.peek();
2014-08-04 23:51:32 +00:00
mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d
searchPlainNotFirstChar;
if(!searchPlainNotFirstChar.canFind(c))
{
return true;
}
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
return !searchAllWhitespace.canFind(reader_.peek(1)) &&
(c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
2011-08-16 12:53:13 +00:00
}
/// Move to the next non-space character.
void findNextNonSpace() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
while(reader_.peekByte() == ' ') { reader_.forward(); }
2011-08-16 12:53:13 +00:00
}
/// Scan a string of alphanumeric or "-_" characters.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanAlphaNumericToSlice(string name)(const Mark startMark)
2014-07-29 01:07:31 +00:00
@system pure nothrow @nogc
{
size_t length = 0;
dchar c = reader_.peek();
while(c.isAlphaNum || "-_"d.canFind(c)) { c = reader_.peek(++length); }
if(length == 0)
{
enum contextMsg = "While scanning " ~ name;
error(contextMsg, startMark, expected("alphanumeric, '-' or '_'", c),
reader_.mark);
return;
}
reader_.sliceBuilder.write(reader_.get(length));
2014-07-29 01:07:31 +00:00
}
2011-08-16 12:53:13 +00:00
/// Scan and throw away all characters until next line break.
void scanToNextBreak() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d search;
while(!search.canFind(reader_.peek())) { reader_.forward(); }
2011-08-16 12:53:13 +00:00
}
/// Scan all characters until next line break.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
void scanToNextBreakToSlice() @system pure nothrow @nogc
{
uint length = 0;
while(!"\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(length)))
{
++length;
}
reader_.sliceBuilder.write(reader_.get(length));
}
2014-07-26 01:16:01 +00:00
/// Move to next token in the file/stream.
///
/// We ignore spaces, line breaks and comments.
/// If we find a line break in the block context, we set
/// allowSimpleKey` on.
///
/// We do not yet support BOM inside the stream as the
/// specification requires. Any such mark will be considered as a part
/// of the document.
2014-07-24 01:50:39 +00:00
void scanToNextToken() @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-26 01:16:01 +00:00
// TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
// Tabs cannot precede tokens
// BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
// KEY(block), VALUE(block), BLOCK-ENTRY
// So the checking code is
// if <TAB>:
// allowSimpleKey_ = false
// We also need to add the check for `allowSimpleKey_ == true` to
// `unwindIndent` before issuing BLOCK-END.
// Scanners for block, flow, and plain scalars need to be modified.
2011-08-16 12:53:13 +00:00
for(;;)
{
findNextNonSpace();
if(reader_.peekByte() == '#') { scanToNextBreak(); }
if(scanLineBreak() != '\0')
2011-08-16 12:53:13 +00:00
{
2014-07-26 01:16:01 +00:00
if(flowLevel_ == 0) { allowSimpleKey_ = true; }
}
else
{
break;
2011-08-16 12:53:13 +00:00
}
}
}
/// Scan directive token.
Token scanDirective() @trusted pure nothrow
2011-08-16 12:53:13 +00:00
{
Mark startMark = reader_.mark;
2014-07-26 14:43:02 +00:00
// Skip the '%'.
2011-08-16 12:53:13 +00:00
reader_.forward();
2014-07-26 15:37:20 +00:00
// Scan directive name
reader_.sliceBuilder.begin();
scanDirectiveNameToSlice(startMark);
if(error_) { return Token.init; }
const name = reader_.sliceBuilder.finish();
2014-07-26 15:37:20 +00:00
reader_.sliceBuilder.begin();
2014-07-26 15:37:20 +00:00
// Index where tag handle ends and suffix starts in a tag directive value.
uint tagHandleEnd = uint.max;
if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); }
else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
if(error_) { return Token.init; }
char[] value = reader_.sliceBuilder.finish();
2011-08-16 12:53:13 +00:00
Mark endMark = reader_.mark;
DirectiveType directive;
2014-07-29 02:10:16 +00:00
if(name == "YAML") { directive = DirectiveType.YAML; }
else if(name == "TAG") { directive = DirectiveType.TAG; }
else
{
directive = DirectiveType.Reserved;
scanToNextBreak();
}
scanDirectiveIgnoredLine(startMark);
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
2014-07-29 02:19:44 +00:00
return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
2011-08-16 12:53:13 +00:00
}
/// Scan name of a directive token.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanDirectiveNameToSlice(const Mark startMark) @system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-26 11:31:31 +00:00
// Scan directive name.
scanAlphaNumericToSlice!"a directive"(startMark);
if(error_) { return; }
2011-08-16 12:53:13 +00:00
if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
error("While scanning a directive", startMark,
expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
2011-08-16 12:53:13 +00:00
}
2014-07-26 15:37:20 +00:00
/// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanYAMLDirectiveValueToSlice(const Mark startMark)
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
findNextNonSpace();
scanYAMLDirectiveNumberToSlice(startMark);
if(error_) { return; }
if(reader_.peekByte() != '.')
{
error("While scanning a directive", startMark,
expected("digit or '.'", reader_.peek()), reader_.mark);
return;
}
// Skip the '.'.
2011-08-16 12:53:13 +00:00
reader_.forward();
reader_.sliceBuilder.write('.');
scanYAMLDirectiveNumberToSlice(startMark);
if(error_) { return; }
if(!" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
{
error("While scanning a directive", startMark,
expected("digit or '.'", reader_.peek()), reader_.mark);
}
2011-08-16 12:53:13 +00:00
}
/// Scan a number from a YAML directive.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanYAMLDirectiveNumberToSlice(const Mark startMark)
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
if(!isDigit(reader_.peek()))
{
error("While scanning a directive", startMark,
expected("digit", reader_.peek()), reader_.mark);
return;
}
2011-08-16 12:53:13 +00:00
2014-07-23 00:46:36 +00:00
// Already found the first digit in the enforce(), so set length to 1.
2011-08-16 12:53:13 +00:00
uint length = 1;
2014-08-05 16:21:29 +00:00
while(reader_.peek(length).isDigit) { ++length; }
2011-08-16 12:53:13 +00:00
reader_.sliceBuilder.write(reader_.get(length));
2011-08-16 12:53:13 +00:00
}
2014-07-25 00:32:29 +00:00
/// Scan value of a tag directive.
2014-07-26 15:37:20 +00:00
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// Returns: Length of tag handle (which is before tag prefix) in scanned data
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
uint scanTagDirectiveValueToSlice(const Mark startMark)
@system pure nothrow
2011-08-16 12:53:13 +00:00
{
findNextNonSpace();
const startLength = reader_.sliceBuilder.length;
scanTagDirectiveHandleToSlice(startMark);
if(error_) { return uint.max; }
const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength);
2011-08-16 12:53:13 +00:00
findNextNonSpace();
scanTagDirectivePrefixToSlice(startMark);
return handleLength;
2011-08-16 12:53:13 +00:00
}
2014-07-26 15:37:20 +00:00
/// Scan handle of a tag directive.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanTagDirectiveHandleToSlice(const Mark startMark)
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
scanTagHandleToSlice!"directive"(startMark);
if(error_) { return; }
if(reader_.peekByte() == ' ') { return; }
error("While scanning a directive handle", startMark,
expected("' '", reader_.peek()), reader_.mark);
2011-08-16 12:53:13 +00:00
}
/// Scan prefix of a tag directive.
2014-07-26 15:37:20 +00:00
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-08-01 23:19:29 +00:00
void scanTagDirectivePrefixToSlice(const Mark startMark)
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
scanTagURIToSlice!"directive"(startMark);
if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
error("While scanning a directive prefix", startMark,
expected("' '", reader_.peek()), reader_.mark);
2011-08-16 12:53:13 +00:00
}
2014-07-26 01:16:01 +00:00
/// Scan (and ignore) ignored line after a directive.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanDirectiveIgnoredLine(const Mark startMark) @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
findNextNonSpace();
if(reader_.peekByte() == '#') { scanToNextBreak(); }
if("\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
{
scanLineBreak();
return;
}
error("While scanning a directive", startMark,
expected("comment or a line break", reader_.peek()), reader_.mark);
2011-08-16 12:53:13 +00:00
}
/// Scan an alias or an anchor.
///
/// The specification does not restrict characters for anchors and
/// aliases. This may lead to problems, for instance, the document:
/// [ *alias, value ]
/// can be interpteted in two ways, as
/// [ "value" ]
/// and
/// [ *alias , "value" ]
/// Therefore we restrict aliases to ASCII alphanumeric characters.
2014-07-26 11:31:31 +00:00
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-07-29 01:09:59 +00:00
Token scanAnchor(const TokenID id) @trusted pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
const startMark = reader_.mark;
const dchar i = reader_.get();
2011-08-16 12:53:13 +00:00
reader_.sliceBuilder.begin();
if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
else { scanAlphaNumericToSlice!"an anchor"(startMark); }
2014-07-26 11:31:31 +00:00
// On error, value is discarded as we return immediately
char[] value = reader_.sliceBuilder.finish();
2014-07-26 11:31:31 +00:00
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
2014-07-26 11:31:31 +00:00
if(!" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()) &&
!"?:,]}%@"d.canFind(reader_.peekByte()))
2014-07-26 11:31:31 +00:00
{
enum anchorCtx = "While scanning an anchor";
enum aliasCtx = "While scanning an alias";
error(i == '*' ? aliasCtx : anchorCtx, startMark,
expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
return Token.init;
}
2011-08-16 12:53:13 +00:00
if(id == TokenID.Alias)
{
2014-07-29 01:09:59 +00:00
return aliasToken(startMark, reader_.mark, value);
2011-08-16 12:53:13 +00:00
}
2014-07-26 11:31:31 +00:00
if(id == TokenID.Anchor)
2011-08-16 12:53:13 +00:00
{
2014-07-29 01:09:59 +00:00
return anchorToken(startMark, reader_.mark, value);
2011-08-16 12:53:13 +00:00
}
assert(false, "This code should never be reached");
}
/// Scan a tag token.
2014-07-25 16:28:08 +00:00
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-08-01 23:19:29 +00:00
Token scanTag() @trusted pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
const startMark = reader_.mark;
dchar c = reader_.peek(1);
reader_.sliceBuilder.begin();
scope(failure) { reader_.sliceBuilder.finish(); }
// Index where tag handle ends and tag suffix starts in the tag value
// (slice) we will produce.
uint handleEnd;
2011-08-16 12:53:13 +00:00
mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
2011-08-16 12:53:13 +00:00
if(c == '<')
{
reader_.forward(2);
handleEnd = 0;
scanTagURIToSlice!"tag"(startMark);
2014-07-25 16:28:08 +00:00
if(error_) { return Token.init; }
if(reader_.peekByte() != '>')
2014-07-25 16:28:08 +00:00
{
error("While scanning a tag", startMark,
expected("'>'", reader_.peek()), reader_.mark);
2014-07-25 16:28:08 +00:00
return Token.init;
}
2011-08-16 12:53:13 +00:00
reader_.forward();
}
else if(" \t\0\n\r\u0085\u2028\u2029"d.canFind(c))
2011-08-16 12:53:13 +00:00
{
reader_.forward();
handleEnd = 0;
reader_.sliceBuilder.write('!');
2011-08-16 12:53:13 +00:00
}
else
{
uint length = 1;
bool useHandle = false;
while(!search.canFind(c))
2011-08-16 12:53:13 +00:00
{
if(c == '!')
{
useHandle = true;
break;
}
++length;
c = reader_.peek(length);
}
2014-07-25 00:32:29 +00:00
if(useHandle)
{
scanTagHandleToSlice!"tag"(startMark);
handleEnd = cast(uint)reader_.sliceBuilder.length;
2014-07-25 16:28:08 +00:00
if(error_) { return Token.init; }
2014-07-25 00:32:29 +00:00
}
2011-08-16 12:53:13 +00:00
else
{
reader_.forward();
reader_.sliceBuilder.write('!');
handleEnd = cast(uint)reader_.sliceBuilder.length;
2011-08-16 12:53:13 +00:00
}
scanTagURIToSlice!"tag"(startMark);
2014-07-25 16:28:08 +00:00
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
}
if(search.canFind(reader_.peek()))
2014-07-25 16:28:08 +00:00
{
char[] slice = reader_.sliceBuilder.finish();
2014-07-29 01:13:21 +00:00
return tagToken(startMark, reader_.mark, slice, handleEnd);
2014-07-25 16:28:08 +00:00
}
error("While scanning a tag", startMark, expected("' '", reader_.peek()),
reader_.mark);
return Token.init;
2011-08-16 12:53:13 +00:00
}
2014-07-26 00:29:27 +00:00
/// Scan a block scalar token with specified style.
2014-07-26 00:53:05 +00:00
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-07-29 21:08:03 +00:00
Token scanBlockScalar(const ScalarStyle style) @trusted pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
const startMark = reader_.mark;
2014-07-26 00:29:27 +00:00
// Scan the header.
2011-08-16 12:53:13 +00:00
reader_.forward();
const indicators = scanBlockScalarIndicators(startMark);
2014-07-26 00:53:05 +00:00
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
const chomping = indicators[0];
const increment = indicators[1];
scanBlockScalarIgnoredLine(startMark);
2014-07-26 00:53:05 +00:00
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
2014-07-26 00:29:27 +00:00
// Determine the indentation level and go to the first non-empty line.
2011-08-16 12:53:13 +00:00
Mark endMark;
uint indent = max(1, indent_ + 1);
2014-07-26 00:29:27 +00:00
reader_.sliceBuilder.begin();
alias Transaction = SliceBuilder.Transaction;
// Used to strip the last line breaks written to the slice at the end of the
// scalar, which may be needed based on chomping.
Transaction breaksTransaction = Transaction(reader_.sliceBuilder);
// Read the first indentation/line breaks before the scalar.
size_t startLen = reader_.sliceBuilder.length;
2011-08-16 12:53:13 +00:00
if(increment == int.min)
{
auto indentation = scanBlockScalarIndentationToSlice();
endMark = indentation[1];
indent = max(indent, indentation[0]);
2011-08-16 12:53:13 +00:00
}
else
{
indent += increment - 1;
endMark = scanBlockScalarBreaksToSlice(indent);
2011-08-16 12:53:13 +00:00
}
2014-07-29 01:13:42 +00:00
// int.max means there's no line break (int.max is outside UTF-32).
dchar lineBreak = cast(dchar)int.max;
2011-08-16 12:53:13 +00:00
2014-07-26 00:29:27 +00:00
// Scan the inner part of the block scalar.
while(reader_.column == indent && reader_.peekByte() != '\0')
2011-08-16 12:53:13 +00:00
{
breaksTransaction.commit();
const bool leadingNonSpace = !" \t"d.canFind(reader_.peekByte());
// This is where the 'interesting' non-whitespace data gets read.
scanToNextBreakToSlice();
lineBreak = scanLineBreak();
2011-08-16 12:53:13 +00:00
// This transaction serves to rollback data read in the
// scanBlockScalarBreaksToSlice() call.
breaksTransaction = Transaction(reader_.sliceBuilder);
startLen = reader_.sliceBuilder.length;
// The line breaks should actually be written _after_ the if() block
// below. We work around that by inserting
endMark = scanBlockScalarBreaksToSlice(indent);
2011-08-16 12:53:13 +00:00
// This will not run during the last iteration (see the if() vs the
// while()), hence breaksTransaction rollback (which happens after this
// loop) will never roll back data written in this if() block.
if(reader_.column == indent && reader_.peekByte() != '\0')
2011-08-16 12:53:13 +00:00
{
2014-07-26 00:34:02 +00:00
// Unfortunately, folding rules are ambiguous.
2014-07-22 00:12:18 +00:00
2014-07-26 00:34:02 +00:00
// This is the folding according to the specification:
if(style == ScalarStyle.Folded && lineBreak == '\n' &&
leadingNonSpace && !" \t"d.canFind(reader_.peekByte()))
2011-08-16 12:53:13 +00:00
{
// No breaks were scanned; no need to insert the space in the
// middle of slice.
if(startLen == reader_.sliceBuilder.length)
{
reader_.sliceBuilder.write(' ');
}
}
else
{
// We need to insert in the middle of the slice in case any line
// breaks were scanned.
reader_.sliceBuilder.insert(lineBreak, startLen);
2011-08-16 12:53:13 +00:00
}
2011-08-16 12:53:13 +00:00
////this is Clark Evans's interpretation (also in the spec
////examples):
//
//if(style == ScalarStyle.Folded && lineBreak == '\n')
2011-08-16 12:53:13 +00:00
//{
// if(startLen == endLen)
2011-08-16 12:53:13 +00:00
// {
// if(!" \t"d.canFind(reader_.peekByte()))
// {
// reader_.sliceBuilder.write(' ');
// }
// else
// {
// chunks ~= lineBreak;
// }
2011-08-16 12:53:13 +00:00
// }
//}
//else
//{
// reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
//}
2011-08-16 12:53:13 +00:00
}
else
{
break;
}
}
// If chompint is Keep, we keep (commit) the last scanned line breaks
// (which are at the end of the scalar). Otherwise re remove them (end the
// transaction).
if(chomping == Chomping.Keep) { breaksTransaction.commit(); }
else { breaksTransaction.__dtor(); }
2014-07-29 01:13:42 +00:00
if(chomping != Chomping.Strip && lineBreak != int.max)
{
// If chomping is Keep, we keep the line break but the first line break
// that isn't stripped (since chomping isn't Strip in this branch) must
// be inserted _before_ the other line breaks.
if(chomping == Chomping.Keep)
{
reader_.sliceBuilder.insert(lineBreak, startLen);
}
// If chomping is not Keep, breaksTransaction was cancelled so we can
// directly write the first line break (as it isn't stripped - chomping
// is not Strip)
else
{
reader_.sliceBuilder.write(lineBreak);
}
2011-08-16 12:53:13 +00:00
}
char[] slice = reader_.sliceBuilder.finish();
return scalarToken(startMark, endMark, slice, style);
2011-08-16 12:53:13 +00:00
}
/// Scan chomping and indentation indicators of a scalar token.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-07-24 01:50:39 +00:00
Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark)
@safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
auto chomping = Chomping.Clip;
int increment = int.min;
dchar c = reader_.peek();
2011-08-16 12:53:13 +00:00
/// Indicators can be in any order.
if(getChomping(c, chomping))
2011-08-16 12:53:13 +00:00
{
getIncrement(c, increment, startMark);
if(error_) { return tuple(Chomping.init, int.max); }
}
else
{
const gotIncrement = getIncrement(c, increment, startMark);
if(error_) { return tuple(Chomping.init, int.max); }
if(gotIncrement) { getChomping(c, chomping); }
2011-08-16 12:53:13 +00:00
}
if(" \0\n\r\u0085\u2028\u2029"d.canFind(c))
2011-08-16 12:53:13 +00:00
{
return tuple(chomping, increment);
2011-08-16 12:53:13 +00:00
}
error("While scanning a block scalar", startMark,
expected("chomping or indentation indicator", c), reader_.mark);
return tuple(Chomping.init, int.max);
}
2011-08-16 12:53:13 +00:00
/// Get chomping indicator, if detected. Return false otherwise.
///
/// Used in scanBlockScalarIndicators.
///
/// Params:
///
/// c = The character that may be a chomping indicator.
/// chomping = Write the chomping value here, if detected.
bool getChomping(ref dchar c, ref Chomping chomping) @safe pure nothrow @nogc
{
if(!"+-"d.canFind(c)) { return false; }
chomping = c == '+' ? Chomping.Keep : Chomping.Strip;
reader_.forward();
c = reader_.peek();
return true;
}
2011-08-16 12:53:13 +00:00
/// Get increment indicator, if detected. Return false otherwise.
///
/// Used in scanBlockScalarIndicators.
///
/// Params:
///
/// c = The character that may be an increment indicator.
/// If an increment indicator is detected, this will be updated to
/// the next character in the Reader.
/// increment = Write the increment value here, if detected.
/// startMark = Mark for error messages.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
bool getIncrement(ref dchar c, ref int increment, const Mark startMark)
@safe pure nothrow @nogc
{
if(!c.isDigit) { return false; }
// Convert a digit to integer.
increment = c - '0';
assert(increment < 10 && increment >= 0, "Digit has invalid value");
if(increment > 0)
{
reader_.forward();
c = reader_.peek();
return true;
}
error("While scanning a block scalar", startMark,
expected("indentation indicator in range 1-9", "0"), reader_.mark);
return false;
2011-08-16 12:53:13 +00:00
}
/// Scan (and ignore) ignored line in a block scalar.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanBlockScalarIgnoredLine(const Mark startMark) @safe pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
findNextNonSpace();
if(reader_.peekByte()== '#') { scanToNextBreak(); }
2011-08-16 12:53:13 +00:00
if("\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
{
scanLineBreak();
return;
}
error("While scanning a block scalar", startMark,
expected("comment or line break", reader_.peek()), reader_.mark);
2011-08-16 12:53:13 +00:00
}
/// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
Tuple!(uint, Mark) scanBlockScalarIndentationToSlice()
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
uint maxIndent;
Mark endMark = reader_.mark;
while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
2011-08-16 12:53:13 +00:00
{
if(reader_.peekByte() != ' ')
2011-08-16 12:53:13 +00:00
{
reader_.sliceBuilder.write(scanLineBreak());
2011-08-16 12:53:13 +00:00
endMark = reader_.mark;
continue;
}
reader_.forward();
maxIndent = max(reader_.column, maxIndent);
}
return tuple(maxIndent, endMark);
2011-08-16 12:53:13 +00:00
}
/// Scan line breaks at lower or specified indentation in a block scalar.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
Mark scanBlockScalarBreaksToSlice(const uint indent) @trusted pure nothrow @nogc
{
Mark endMark = reader_.mark;
for(;;)
{
while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
reader_.sliceBuilder.write(scanLineBreak());
endMark = reader_.mark;
}
return endMark;
}
2014-07-23 01:09:48 +00:00
/// Scan a qouted flow scalar token with specified quotes.
///
2014-07-25 14:54:06 +00:00
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-07-29 21:08:03 +00:00
Token scanFlowScalar(const ScalarStyle quotes) @trusted pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
const startMark = reader_.mark;
2014-07-24 01:50:39 +00:00
const quote = reader_.get();
2011-08-16 12:53:13 +00:00
reader_.sliceBuilder.begin();
scope(exit) if(error_) { reader_.sliceBuilder.finish(); }
scanFlowScalarNonSpacesToSlice(quotes, startMark);
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
while(reader_.peek() != quote)
{
scanFlowScalarSpacesToSlice(startMark);
if(error_) { return Token.init; }
scanFlowScalarNonSpacesToSlice(quotes, startMark);
if(error_) { return Token.init; }
2011-08-16 12:53:13 +00:00
}
reader_.forward();
auto slice = reader_.sliceBuilder.finish();
return scalarToken(startMark, reader_.mark, slice, quotes);
2011-08-16 12:53:13 +00:00
}
/// Scan nonspace characters in a flow scalar.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
2014-07-24 21:20:31 +00:00
///
2014-07-25 14:54:06 +00:00
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
@system pure nothrow @nogc
{
for(;;) with(ScalarStyle)
2011-08-16 12:53:13 +00:00
{
dchar c = reader_.peek();
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
size_t numCodePoints = 0;
// This is an optimized way of writing:
// while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
outer: for(size_t oldSliceLength;;)
2011-08-16 12:53:13 +00:00
{
// This will not necessarily make slice 32 chars longer, as not all
// code points are 1 char.
const char[] slice = reader_.slice(numCodePoints + 32);
if(slice.length == oldSliceLength)
{
error("While reading a flow scalar", startMark,
"reached end of file", reader_.mark);
return;
}
for(size_t i = oldSliceLength; i < slice.length;)
{
// slice is UTF-8 - need to decode
const ch = slice[i] < 0x80 ? slice[i++] : decodeValidUTF8NoGC(slice, i);
if(search.canFind(ch)) { break outer; }
++numCodePoints;
}
oldSliceLength = slice.length;
2011-08-16 12:53:13 +00:00
}
reader_.sliceBuilder.write(reader_.get(numCodePoints));
2011-08-16 12:53:13 +00:00
c = reader_.peek();
if(quotes == SingleQuoted && c == '\'' && reader_.peek(1) == '\'')
2011-08-16 12:53:13 +00:00
{
reader_.forward(2);
reader_.sliceBuilder.write('\'');
2011-08-16 12:53:13 +00:00
}
else if((quotes == DoubleQuoted && c == '\'') ||
(quotes == SingleQuoted && "\"\\"d.canFind(c)))
2011-08-16 12:53:13 +00:00
{
reader_.forward();
reader_.sliceBuilder.write(c);
2011-08-16 12:53:13 +00:00
}
else if(quotes == DoubleQuoted && c == '\\')
2011-08-16 12:53:13 +00:00
{
reader_.forward();
c = reader_.peek();
if(dyaml.escapes.escapes.canFind(c))
2011-08-16 12:53:13 +00:00
{
reader_.forward();
// Escaping has been moved to Parser as it can't be done in
// place (in a slice) in case of '\P' and '\L' (very uncommon,
// but we don't want to break the spec)
char[2] escapeSequence = ['\\', cast(char)c];
reader_.sliceBuilder.write(escapeSequence);
2011-08-16 12:53:13 +00:00
}
else if(dyaml.escapes.escapeHexCodeList.canFind(c))
2011-08-16 12:53:13 +00:00
{
const hexLength = dyaml.escapes.escapeHexLength(c);
2011-08-16 12:53:13 +00:00
reader_.forward();
foreach(i; 0 .. hexLength) if(!reader_.peek(i).isHexDigit)
2011-08-16 12:53:13 +00:00
{
error("While scanning a double quoted scalar", startMark,
expected("escape sequence of hexadecimal numbers",
reader_.peek(i)), reader_.mark);
return;
2011-08-16 12:53:13 +00:00
}
char[] hex = reader_.get(hexLength);
char[2] escapeStart = ['\\', cast(char) c];
reader_.sliceBuilder.write(escapeStart);
reader_.sliceBuilder.write(hex);
bool overflow;
// Note: This is just error checking; Parser does the actual
// escaping (otherwise we could accidentally create an
// escape sequence here that wasn't in input, breaking the
// escaping code in parser, which is in parser because it
// can't always be done in place)
parseNoGC!int(hex, 16u, overflow);
if(overflow)
{
error("While scanning a double quoted scalar", startMark,
"overflow when parsing an escape sequence of "
"hexadecimal numbers.", reader_.mark);
2014-07-25 14:54:46 +00:00
return;
}
2011-08-16 12:53:13 +00:00
}
else if("\n\r\u0085\u2028\u2029"d.canFind(c))
2011-08-16 12:53:13 +00:00
{
scanLineBreak();
scanFlowScalarBreaksToSlice(startMark);
if(error_) { return; }
2011-08-16 12:53:13 +00:00
}
else
{
error("While scanning a double quoted scalar", startMark,
buildMsg("found unsupported escape character", c),
reader_.mark);
return;
2011-08-16 12:53:13 +00:00
}
}
else { return; }
2011-08-16 12:53:13 +00:00
}
}
2014-07-23 00:17:19 +00:00
/// Scan space characters in a flow scalar.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// spaces into that slice.
2014-07-24 21:20:31 +00:00
///
2014-07-25 14:54:06 +00:00
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanFlowScalarSpacesToSlice(const Mark startMark)
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-23 00:17:19 +00:00
// Increase length as long as we see whitespace.
size_t length = 0;
2014-07-23 00:17:19 +00:00
while(" \t"d.canFind(reader_.peek(length))) { ++length; }
auto whitespaces = reader_.prefix(length);
2011-08-16 12:53:13 +00:00
// Can check the last byte without striding because '\0' is ASCII
const c = reader_.peek(length);
if(c == '\0')
{
error("While scanning a quoted scalar", startMark,
"found unexpected end of buffer", reader_.mark);
return;
}
2011-08-16 12:53:13 +00:00
// Spaces not followed by a line break.
if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
{
reader_.forward(length);
reader_.sliceBuilder.write(whitespaces);
return;
}
// There's a line break after the spaces.
reader_.forward(length);
const lineBreak = scanLineBreak();
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
// If we have extra line breaks after the first, scan them into the
// slice.
const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
if(error_) { return; }
// No extra breaks, one normal line break. Replace it with a space.
if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
2011-08-16 12:53:13 +00:00
}
2014-07-23 00:17:19 +00:00
/// Scan line breaks in a flow scalar.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// line breaks into that slice.
///
2014-07-25 14:54:06 +00:00
/// In case of an error, error_ is set. Use throwIfError() to handle this.
bool scanFlowScalarBreaksToSlice(const Mark startMark)
@system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
// True if at least one line break was found.
bool anyBreaks;
2011-08-16 12:53:13 +00:00
for(;;)
{
2014-07-23 00:17:19 +00:00
// Instead of checking indentation, we check for document separators.
const prefix = reader_.prefix(3);
if((prefix == "---" || prefix == "...") &&
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3)))
2011-08-16 12:53:13 +00:00
{
error("While scanning a quoted scalar", startMark,
"found unexpected document separator", reader_.mark);
return false;
2011-08-16 12:53:13 +00:00
}
// Skip any whitespaces.
while(" \t"d.canFind(reader_.peekByte())) { reader_.forward(); }
2011-08-16 12:53:13 +00:00
// Encountered a non-whitespace non-linebreak character, so we're done.
if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
const lineBreak = scanLineBreak();
anyBreaks = true;
reader_.sliceBuilder.write(lineBreak);
2011-08-16 12:53:13 +00:00
}
return anyBreaks;
2011-08-16 12:53:13 +00:00
}
/// Scan plain scalar token (no block, no quotes).
2014-07-24 16:44:20 +00:00
///
2014-07-25 14:54:06 +00:00
/// In case of an error, error_ is set. Use throwIfError() to handle this.
2014-07-29 21:08:03 +00:00
Token scanPlain() @trusted pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-23 00:46:36 +00:00
// We keep track of the allowSimpleKey_ flag here.
// Indentation rules are loosed for the flow context
2011-08-16 12:53:13 +00:00
const startMark = reader_.mark;
Mark endMark = startMark;
const indent = indent_ + 1;
2014-07-23 00:46:36 +00:00
// We allow zero indentation for scalars, but then we need to check for
// document separators at the beginning of the line.
// if(indent == 0) { indent = 1; }
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d search;
reader_.sliceBuilder.begin();
2011-08-16 12:53:13 +00:00
alias Transaction = SliceBuilder.Transaction;
Transaction spacesTransaction;
// Stop at a comment.
while(reader_.peekByte() != '#')
{
// Scan the entire plain scalar.
2011-08-16 12:53:13 +00:00
uint length = 0;
dchar c;
for(;;)
{
c = reader_.peek(length);
2014-07-22 00:12:18 +00:00
const bool done = search.canFind(c) || (flowLevel_ == 0 && c == ':' &&
search.canFind(reader_.peek(length + 1))) ||
(flowLevel_ > 0 && ",:?[]{}"d.canFind(c));
2014-07-23 00:46:36 +00:00
if(done) { break; }
2011-08-16 12:53:13 +00:00
++length;
}
2014-07-23 00:46:36 +00:00
// It's not clear what we should do with ':' in the flow context.
2011-08-16 12:53:13 +00:00
if(flowLevel_ > 0 && c == ':' &&
!search.canFind(reader_.peek(length + 1)) &&
!",[]{}"d.canFind(reader_.peek(length + 1)))
2011-08-16 12:53:13 +00:00
{
// This is an error; throw the slice away.
spacesTransaction.commit();
reader_.sliceBuilder.finish();
2011-08-16 12:53:13 +00:00
reader_.forward(length);
error("While scanning a plain scalar", startMark,
"found unexpected ':' . Please check "
"http://pyyaml.org/wiki/YAMLColonInFlowContext for details.",
reader_.mark);
return Token.init;
2011-08-16 12:53:13 +00:00
}
2014-07-23 00:46:36 +00:00
if(length == 0) { break; }
2011-08-16 12:53:13 +00:00
allowSimpleKey_ = false;
reader_.sliceBuilder.write(reader_.get(length));
2011-08-16 12:53:13 +00:00
endMark = reader_.mark;
spacesTransaction.commit();
spacesTransaction = Transaction(reader_.sliceBuilder);
const startLength = reader_.sliceBuilder.length;
scanPlainSpacesToSlice(startMark);
if(startLength == reader_.sliceBuilder.length ||
(flowLevel_ == 0 && reader_.column < indent))
2011-08-16 12:53:13 +00:00
{
break;
}
}
spacesTransaction.__dtor();
char[] slice = reader_.sliceBuilder.finish();
2014-07-29 02:28:07 +00:00
return scalarToken(startMark, endMark, slice, ScalarStyle.Plain);
2011-08-16 12:53:13 +00:00
}
2014-07-23 00:17:19 +00:00
/// Scan spaces in a plain scalar.
///
/// Assumes that the caller is building a slice in Reader, and puts the spaces
/// into that slice.
void scanPlainSpacesToSlice(const Mark startMark) @system pure nothrow @nogc
2011-08-16 12:53:13 +00:00
{
2014-07-23 00:17:19 +00:00
// The specification is really confusing about tabs in plain scalars.
// We just forbid them completely. Do not use tabs in YAML!
2011-08-16 12:53:13 +00:00
// Get as many plain spaces as there are.
size_t length = 0;
while(reader_.peekByte(length) == ' ') { ++length; }
char[] whitespaces = reader_.get(length);
2011-08-16 12:53:13 +00:00
dchar c = reader_.peek();
mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search;
// No newline after the spaces (if any)
// (Excluding ' ' so we can use the same FastCharSearch as below)
if(!search.canFind(c) && c != ' ')
2011-08-16 12:53:13 +00:00
{
// We have spaces, but no newline.
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
return;
}
// Newline after the spaces (if any)
const lineBreak = scanLineBreak();
allowSimpleKey_ = true;
static bool end(Reader reader_) @safe pure nothrow @nogc
{
return ("---" == reader_.prefix(3) || "..." == reader_.prefix(3))
&& " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
}
2011-08-16 12:53:13 +00:00
if(end(reader_)) { return; }
2011-08-16 12:53:13 +00:00
bool extraBreaks = false;
alias Transaction = SliceBuilder.Transaction;
auto transaction = Transaction(reader_.sliceBuilder);
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
while(search.canFind(reader_.peek()))
{
if(reader_.peekByte() == ' ') { reader_.forward(); }
else
2011-08-16 12:53:13 +00:00
{
const lBreak = scanLineBreak();
extraBreaks = true;
reader_.sliceBuilder.write(lBreak);
if(end(reader_)) { return; }
2011-08-16 12:53:13 +00:00
}
}
transaction.commit();
2011-08-16 12:53:13 +00:00
// No line breaks, only a space.
if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
2011-08-16 12:53:13 +00:00
}
/// Scan handle of a tag token.
2014-07-25 00:32:29 +00:00
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
2014-07-25 14:55:15 +00:00
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanTagHandleToSlice(string name)(const Mark startMark)
2014-07-29 01:11:38 +00:00
@system pure nothrow @nogc
{
dchar c = reader_.peek();
enum contextMsg = "While scanning a " ~ name;
if(c != '!')
{
error(contextMsg, startMark, expected("'!'", c), reader_.mark);
return;
}
uint length = 1;
c = reader_.peek(length);
if(c != ' ')
{
while(c.isAlphaNum || "-_"d.canFind(c))
{
++length;
c = reader_.peek(length);
}
if(c != '!')
{
reader_.forward(length);
error(contextMsg, startMark, expected("'!'", c), reader_.mark);
return;
}
++length;
}
reader_.sliceBuilder.write(reader_.get(length));
2014-07-29 01:11:38 +00:00
}
2011-08-16 12:53:13 +00:00
/// Scan URI in a tag token.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanTagURIToSlice(string name)(const Mark startMark)
2014-08-01 23:19:29 +00:00
@trusted pure nothrow @nogc
2014-07-29 01:11:17 +00:00
{
// Note: we do not check if URI is well-formed.
dchar c = reader_.peek();
const startLen = reader_.sliceBuilder.length;
2014-07-29 01:11:17 +00:00
{
uint length = 0;
mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search;
while(c.isAlphaNum || search.canFind(c))
2014-07-29 01:11:17 +00:00
{
if(c == '%')
{
auto chars = reader_.get(length);
reader_.sliceBuilder.write(chars);
2014-07-29 01:11:17 +00:00
length = 0;
scanURIEscapesToSlice!name(startMark);
2014-07-29 01:11:17 +00:00
if(error_) { return; }
}
else { ++length; }
c = reader_.peek(length);
}
if(length > 0)
{
auto chars = reader_.get(length);
reader_.sliceBuilder.write(chars);
2014-07-29 01:11:17 +00:00
length = 0;
}
}
// OK if we scanned something, error otherwise.
if(reader_.sliceBuilder.length > startLen) { return; }
2014-07-29 01:11:17 +00:00
enum contextMsg = "While parsing a " ~ name;
error(contextMsg, startMark, expected("URI", c), reader_.mark);
}
2011-08-16 12:53:13 +00:00
// Not @nogc yet because std.utf.decode is not @nogc
/// Scan URI escape sequences.
///
/// Assumes that the caller is building a slice in Reader, and puts the scanned
/// characters into that slice.
///
/// In case of an error, error_ is set. Use throwIfError() to handle this.
void scanURIEscapesToSlice(string name)(const Mark startMark)
2014-08-01 23:19:29 +00:00
@system pure nothrow @nogc
2014-07-29 01:10:51 +00:00
{
// URI escapes encode a UTF-8 string. We store UTF-8 code units here for
// decoding into UTF-32.
char[4] bytes;
size_t bytesUsed;
Mark mark = reader_.mark;
// Get one dchar by decoding data from bytes.
//
// This is probably slow, but simple and URI escapes are extremely uncommon
// in YAML.
2014-08-01 23:16:29 +00:00
//
// Returns the number of bytes used by the dchar in bytes on success,
// size_t.max on failure.
2014-07-29 01:10:51 +00:00
static size_t getDchar(char[] bytes, Reader reader_)
{
size_t nextChar;
2014-08-01 23:16:29 +00:00
dchar c;
if(bytes[0] < 0x80)
{
c = bytes[0];
++nextChar;
}
else
{
const decoded = decodeUTF8NoGC!(No.validated)(bytes[], nextChar);
if(decoded.errorMessage !is null) { return size_t.max; }
c = decoded.decoded;
}
reader_.sliceBuilder.write(c);
2014-07-29 01:10:51 +00:00
if(bytes.length - nextChar > 0)
{
core.stdc.string.memmove(bytes.ptr, bytes.ptr + nextChar,
bytes.length - nextChar);
}
return bytes.length - nextChar;
}
enum contextMsg = "While scanning a " ~ name;
while(reader_.peekByte() == '%')
2014-07-29 01:10:51 +00:00
{
2014-08-01 23:16:29 +00:00
reader_.forward();
if(bytesUsed == bytes.length)
2014-07-29 01:10:51 +00:00
{
2014-08-01 23:16:29 +00:00
bytesUsed = getDchar(bytes[], reader_);
if(bytesUsed == size_t.max)
2014-07-29 01:10:51 +00:00
{
2014-08-01 23:19:29 +00:00
error(contextMsg, startMark,
2014-08-01 23:16:29 +00:00
"Invalid UTF-8 data encoded in URI escape sequence",
reader_.mark);
return;
2014-07-29 01:10:51 +00:00
}
2014-08-01 23:16:29 +00:00
}
2014-07-29 01:10:51 +00:00
2014-08-01 23:16:29 +00:00
char b = 0;
uint mult = 16;
// Converting 2 hexadecimal digits to a byte.
foreach(k; 0 .. 2)
{
const dchar c = reader_.peek(k);
if(!c.isHexDigit)
{
2014-08-01 23:16:29 +00:00
auto msg = expected("URI escape sequence of 2 hexadecimal "
"numbers", c);
error(contextMsg, startMark, msg, reader_.mark);
return;
}
2014-08-01 23:16:29 +00:00
uint digit;
if(c - '0' < 10) { digit = c - '0'; }
else if(c - 'A' < 6) { digit = c - 'A'; }
else if(c - 'a' < 6) { digit = c - 'a'; }
else { assert(false); }
b += mult * digit;
mult /= 16;
}
2014-08-01 23:16:29 +00:00
bytes[bytesUsed++] = b;
2014-08-01 23:16:29 +00:00
reader_.forward(2);
2011-08-16 12:53:13 +00:00
}
2014-08-01 23:16:29 +00:00
bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
2011-08-16 12:53:13 +00:00
}
2014-07-23 00:17:19 +00:00
/// Scan a line break, if any.
///
/// Transforms:
/// '\r\n' : '\n'
/// '\r' : '\n'
/// '\n' : '\n'
/// '\u0085' : '\n'
/// '\u2028' : '\u2028'
/// '\u2029 : '\u2029'
/// no break : '\0'
dchar scanLineBreak() @safe pure nothrow @nogc
2014-07-29 01:07:57 +00:00
{
// Fast path for ASCII line breaks.
const b = reader_.peekByte();
if(b < 0x80)
{
if(b == '\n' || b == '\r')
{
if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
else { reader_.forward(); }
return '\n';
}
return '\0';
}
2014-07-29 01:07:57 +00:00
const c = reader_.peek();
if(c == '\x85')
2014-07-29 01:07:57 +00:00
{
reader_.forward();
2014-07-29 01:07:57 +00:00
return '\n';
}
if(c == '\u2028' || c == '\u2029')
{
reader_.forward();
return c;
}
return '\0';
}
2011-08-16 12:53:13 +00:00
}
private:
/// A nothrow function that converts a dchar[] to a string.
string utf32To8(C)(C[] str) @safe pure nothrow
if(is(Unqual!C == dchar))
{
try { return str.to!string; }
catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); }
catch(Exception e) { assert(false, "Unexpected exception during UTF-8 encoding"); }
}