2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-23 00:54:50 +00:00
|
|
|
// Copyright Ferdinand Majerech 2011-2014.
|
2011-08-16 12:53:13 +00:00
|
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
|
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
|
|
|
// http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* YAML scanner.
|
|
|
|
* Code based on PyYAML: http://www.pyyaml.org
|
|
|
|
*/
|
|
|
|
module dyaml.scanner;
|
|
|
|
|
|
|
|
|
|
|
|
import core.stdc.string;
|
|
|
|
|
|
|
|
import std.algorithm;
|
|
|
|
import std.array;
|
2011-10-29 16:21:44 +00:00
|
|
|
import std.container;
|
2011-08-16 12:53:13 +00:00
|
|
|
import std.conv;
|
|
|
|
import std.ascii : isAlphaNum, isDigit, isHexDigit;
|
|
|
|
import std.exception;
|
|
|
|
import std.string;
|
|
|
|
import std.typecons;
|
2014-07-24 01:06:24 +00:00
|
|
|
import std.traits : Unqual;
|
2011-08-16 12:53:13 +00:00
|
|
|
import std.utf;
|
|
|
|
|
2011-10-24 18:36:26 +00:00
|
|
|
import dyaml.fastcharsearch;
|
2011-10-23 18:17:37 +00:00
|
|
|
import dyaml.escapes;
|
2011-08-16 12:53:13 +00:00
|
|
|
import dyaml.exception;
|
2014-07-25 14:53:40 +00:00
|
|
|
import dyaml.nogcutil;
|
2011-10-20 08:34:34 +00:00
|
|
|
import dyaml.queue;
|
2011-08-16 12:53:13 +00:00
|
|
|
import dyaml.reader;
|
2011-10-27 21:13:14 +00:00
|
|
|
import dyaml.style;
|
2011-08-16 12:53:13 +00:00
|
|
|
import dyaml.token;
|
|
|
|
|
|
|
|
package:
|
|
|
|
/**
|
|
|
|
* Scanner produces tokens of the following types:
|
|
|
|
* STREAM-START
|
|
|
|
* STREAM-END
|
|
|
|
* DIRECTIVE(name, value)
|
|
|
|
* DOCUMENT-START
|
|
|
|
* DOCUMENT-END
|
|
|
|
* BLOCK-SEQUENCE-START
|
|
|
|
* BLOCK-MAPPING-START
|
|
|
|
* BLOCK-END
|
|
|
|
* FLOW-SEQUENCE-START
|
|
|
|
* FLOW-MAPPING-START
|
|
|
|
* FLOW-SEQUENCE-END
|
|
|
|
* FLOW-MAPPING-END
|
|
|
|
* BLOCK-ENTRY
|
|
|
|
* FLOW-ENTRY
|
|
|
|
* KEY
|
|
|
|
* VALUE
|
|
|
|
* ALIAS(value)
|
|
|
|
* ANCHOR(value)
|
|
|
|
* TAG(value)
|
|
|
|
* SCALAR(value, plain, style)
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Marked exception thrown at scanner errors.
|
|
|
|
*
|
|
|
|
* See_Also: MarkedYAMLException
|
|
|
|
*/
|
|
|
|
class ScannerException : MarkedYAMLException
|
|
|
|
{
|
2011-10-13 09:30:14 +00:00
|
|
|
mixin MarkedExceptionCtors;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2011-10-18 14:12:22 +00:00
|
|
|
private alias ScannerException Error;
|
2014-07-23 16:25:46 +00:00
|
|
|
private alias MarkedYAMLExceptionData ErrorData;
|
2011-10-18 14:12:22 +00:00
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Generates tokens from data provided by a Reader.
|
2011-08-16 12:53:13 +00:00
|
|
|
final class Scanner
|
|
|
|
{
|
|
|
|
private:
|
2014-07-23 16:26:39 +00:00
|
|
|
/// A simple key is a key that is not denoted by the '?' indicator.
|
|
|
|
/// For example:
|
|
|
|
/// ---
|
|
|
|
/// block simple key: value
|
|
|
|
/// ? not a simple key:
|
|
|
|
/// : { flow simple key: value }
|
|
|
|
/// We emit the KEY token before all keys, so when we find a potential simple
|
|
|
|
/// key, we try to locate the corresponding ':' indicator. Simple keys should be
|
|
|
|
/// limited to a single line and 1024 characters.
|
|
|
|
///
|
|
|
|
/// 16 bytes on 64-bit.
|
2011-08-16 12:53:13 +00:00
|
|
|
static struct SimpleKey
|
|
|
|
{
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Character index in reader where the key starts.
|
2011-10-24 18:36:26 +00:00
|
|
|
uint charIndex = uint.max;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Index of the key token from start (first token scanned being 0).
|
2011-08-16 12:53:13 +00:00
|
|
|
uint tokenIndex;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Line the key starts at.
|
2012-09-26 09:51:47 +00:00
|
|
|
uint line;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Column the key starts at.
|
2011-10-24 18:36:26 +00:00
|
|
|
ushort column;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Is this required to be a simple key?
|
2011-08-16 12:53:13 +00:00
|
|
|
bool required;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Is this struct "null" (invalid)?.
|
2011-10-24 18:36:26 +00:00
|
|
|
bool isNull;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Block chomping types.
|
2011-08-16 12:53:13 +00:00
|
|
|
enum Chomping
|
|
|
|
{
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Strip all trailing line breaks. '-' indicator.
|
2011-08-16 12:53:13 +00:00
|
|
|
Strip,
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Line break of the last line is preserved, others discarded. Default.
|
2014-07-22 00:12:18 +00:00
|
|
|
Clip,
|
2014-07-23 16:26:39 +00:00
|
|
|
/// All trailing line breaks are preserved. '+' indicator.
|
2014-07-22 00:12:18 +00:00
|
|
|
Keep
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Reader used to read from a file/stream.
|
2011-08-16 12:53:13 +00:00
|
|
|
Reader reader_;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Are we done scanning?
|
2011-08-16 12:53:13 +00:00
|
|
|
bool done_;
|
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Level of nesting in flow context. If 0, we're in block context.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint flowLevel_;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Current indentation level.
|
2011-08-16 12:53:13 +00:00
|
|
|
int indent_ = -1;
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Past indentation levels. Used as a stack.
|
2011-10-29 16:21:44 +00:00
|
|
|
Array!int indents_;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Processed tokens not yet emitted. Used as a queue.
|
2011-10-20 08:34:34 +00:00
|
|
|
Queue!Token tokens_;
|
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Number of tokens emitted through the getToken method.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint tokensTaken_;
|
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Can a simple key start at the current position? A simple key may start:
|
|
|
|
/// - at the beginning of the line, not counting indentation spaces
|
|
|
|
/// (in block context),
|
|
|
|
/// - after '{', '[', ',' (in the flow context),
|
|
|
|
/// - after '?', ':', '-' (in the block context).
|
|
|
|
/// In the block context, this flag also signifies if a block collection
|
|
|
|
/// may start at the current position.
|
2011-08-16 12:53:13 +00:00
|
|
|
bool allowSimpleKey_ = true;
|
2011-10-24 18:36:26 +00:00
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Possible simple keys indexed by flow levels.
|
2011-10-24 18:36:26 +00:00
|
|
|
SimpleKey[] possibleSimpleKeys_;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-23 16:26:39 +00:00
|
|
|
/// Used for constructing strings while limiting reallocation.
|
2011-10-23 18:17:37 +00:00
|
|
|
Appender!(dchar[]) appender_;
|
|
|
|
|
2014-07-23 16:25:46 +00:00
|
|
|
|
|
|
|
/// Set on error by nothrow/@nogc inner functions along with errorData_.
|
|
|
|
///
|
|
|
|
/// Non-nothrow/GC-using caller functions can then throw an exception using
|
|
|
|
/// data stored in errorData_.
|
|
|
|
bool error_;
|
|
|
|
|
|
|
|
/// Data for the exception to throw if error_ is true.
|
|
|
|
ErrorData errorData_;
|
|
|
|
|
2014-07-24 21:17:24 +00:00
|
|
|
/// Error messages can be built in this buffer without using the GC.
|
|
|
|
///
|
|
|
|
/// ScannerException (MarkedYAMLException) copies string data passed to its
|
|
|
|
/// constructor so it's safe to use slices of this buffer as parameters for
|
|
|
|
/// exceptions that may outlive the Scanner. The GC allocation when creating the
|
|
|
|
/// error message is removed, but the allocation when creating an exception is
|
|
|
|
/// not.
|
|
|
|
char[256] msgBuffer_;
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
public:
|
2014-07-25 16:26:23 +00:00
|
|
|
/// Construct a Scanner using specified Reader.
|
2014-07-24 01:50:39 +00:00
|
|
|
this(Reader reader) @safe nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 16:26:23 +00:00
|
|
|
// Return the next token, but do not delete it from the queue
|
|
|
|
reader_ = reader;
|
2011-10-23 18:17:37 +00:00
|
|
|
appender_ = appender!(dchar[])();
|
2011-08-16 12:53:13 +00:00
|
|
|
fetchStreamStart();
|
|
|
|
}
|
|
|
|
|
2014-07-25 16:26:23 +00:00
|
|
|
/// Destroy the scanner.
|
2012-09-08 23:42:13 +00:00
|
|
|
@trusted ~this()
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-19 02:17:18 +00:00
|
|
|
tokens_.destroy();
|
|
|
|
indents_.destroy();
|
|
|
|
possibleSimpleKeys_.destroy();
|
2011-08-16 12:53:13 +00:00
|
|
|
possibleSimpleKeys_ = null;
|
2014-07-19 02:17:18 +00:00
|
|
|
appender_.destroy();
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_ = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if the next token is one of specified types.
|
|
|
|
*
|
|
|
|
* If no types are specified, checks if any tokens are left.
|
|
|
|
*
|
|
|
|
* Params: ids = Token IDs to check for.
|
|
|
|
*
|
|
|
|
* Returns: true if the next token is one of specified types,
|
|
|
|
* or if there are any tokens left if no types specified.
|
|
|
|
* false otherwise.
|
|
|
|
*/
|
2012-12-27 20:21:56 +00:00
|
|
|
bool checkToken(const TokenID[] ids ...) @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Check if the next token is one of specified types.
|
|
|
|
while(needMoreTokens()){fetchToken();}
|
|
|
|
if(!tokens_.empty)
|
|
|
|
{
|
|
|
|
if(ids.length == 0){return true;}
|
|
|
|
else
|
|
|
|
{
|
2011-10-20 08:34:34 +00:00
|
|
|
const nextId = tokens_.peek().id;
|
2011-08-16 12:53:13 +00:00
|
|
|
foreach(id; ids)
|
|
|
|
{
|
|
|
|
if(nextId == id){return true;}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2014-07-22 00:12:18 +00:00
|
|
|
* Return the next token, but keep it in the queue.
|
2011-08-16 12:53:13 +00:00
|
|
|
*
|
|
|
|
* Must not be called if there are no tokens left.
|
|
|
|
*/
|
2012-09-08 23:42:13 +00:00
|
|
|
ref const(Token) peekToken() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
while(needMoreTokens){fetchToken();}
|
2011-10-20 08:34:34 +00:00
|
|
|
if(!tokens_.empty){return tokens_.peek();}
|
2011-08-16 12:53:13 +00:00
|
|
|
assert(false, "No token left to peek");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the next token, removing it from the queue.
|
|
|
|
*
|
|
|
|
* Must not be called if there are no tokens left.
|
|
|
|
*/
|
2012-09-08 23:42:13 +00:00
|
|
|
Token getToken() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
while(needMoreTokens){fetchToken();}
|
|
|
|
if(!tokens_.empty)
|
|
|
|
{
|
|
|
|
++tokensTaken_;
|
2011-10-20 08:34:34 +00:00
|
|
|
return tokens_.pop();
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
assert(false, "No token left to get");
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2014-07-25 16:26:08 +00:00
|
|
|
/// Build an error message in msgBuffer_ and return it as a string.
|
|
|
|
string buildMsg(S ...)(S args) @trusted pure nothrow @nogc
|
|
|
|
{
|
|
|
|
return cast(string)msgBuffer_.printNoGC(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If error_ is true, throws a ScannerException constructed from errorData_ and
|
|
|
|
/// sets error_ to false.
|
|
|
|
void throwIfError() @safe pure
|
|
|
|
{
|
|
|
|
if(!error_) { return; }
|
|
|
|
error_ = false;
|
|
|
|
throw new ScannerException(errorData_);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by internal nothrow/@nogc methods to set an error to be thrown by
|
|
|
|
/// their callers.
|
|
|
|
///
|
|
|
|
/// See_Also: dyaml.exception.MarkedYamlException
|
|
|
|
void setError(string context, const Mark contextMark, string problem,
|
|
|
|
const Mark problemMark) @safe pure nothrow @nogc
|
|
|
|
{
|
|
|
|
assert(error_ == false,
|
|
|
|
"Setting an error when there already is a not yet thrown error");
|
|
|
|
error_ = true;
|
|
|
|
errorData_ = MarkedYAMLExceptionData(context, contextMark, problem, problemMark);
|
|
|
|
}
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
///Determine whether or not we need to fetch more tokens before peeking/getting a token.
|
2014-07-24 01:50:39 +00:00
|
|
|
bool needMoreTokens() @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(done_) {return false;}
|
|
|
|
if(tokens_.empty){return true;}
|
2014-07-22 00:12:18 +00:00
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
///The current token may be a potential simple key, so we need to look further.
|
|
|
|
stalePossibleSimpleKeys();
|
|
|
|
return nextPossibleSimpleKey() == tokensTaken_;
|
|
|
|
}
|
|
|
|
|
|
|
|
///Fetch at token, adding it to tokens_.
|
2014-07-24 01:50:39 +00:00
|
|
|
void fetchToken() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
///Eat whitespaces and comments until we reach the next token.
|
|
|
|
scanToNextToken();
|
|
|
|
|
|
|
|
//Remove obsolete possible simple keys.
|
|
|
|
stalePossibleSimpleKeys();
|
|
|
|
|
|
|
|
//Compare current indentation and column. It may add some tokens
|
|
|
|
//and decrease the current indentation level.
|
|
|
|
unwindIndent(reader_.column);
|
|
|
|
|
|
|
|
//Get the next character.
|
2011-10-30 19:24:43 +00:00
|
|
|
const dchar c = reader_.peek();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
//Fetch the token.
|
|
|
|
if(c == '\0') {return fetchStreamEnd();}
|
|
|
|
if(checkDirective()) {return fetchDirective();}
|
|
|
|
if(checkDocumentStart()) {return fetchDocumentStart();}
|
|
|
|
if(checkDocumentEnd()) {return fetchDocumentEnd();}
|
|
|
|
//Order of the following checks is NOT significant.
|
|
|
|
if(c == '[') {return fetchFlowSequenceStart();}
|
|
|
|
if(c == '{') {return fetchFlowMappingStart();}
|
|
|
|
if(c == ']') {return fetchFlowSequenceEnd();}
|
|
|
|
if(c == '}') {return fetchFlowMappingEnd();}
|
|
|
|
if(c == ',') {return fetchFlowEntry();}
|
|
|
|
if(checkBlockEntry()) {return fetchBlockEntry();}
|
|
|
|
if(checkKey()) {return fetchKey();}
|
|
|
|
if(checkValue()) {return fetchValue();}
|
|
|
|
if(c == '*') {return fetchAlias();}
|
|
|
|
if(c == '&') {return fetchAnchor();}
|
|
|
|
if(c == '!') {return fetchTag();}
|
|
|
|
if(c == '|' && flowLevel_ == 0){return fetchLiteral();}
|
|
|
|
if(c == '>' && flowLevel_ == 0){return fetchFolded();}
|
|
|
|
if(c == '\'') {return fetchSingle();}
|
|
|
|
if(c == '\"') {return fetchDouble();}
|
|
|
|
if(checkPlain()) {return fetchPlain();}
|
|
|
|
|
2011-10-18 14:12:22 +00:00
|
|
|
throw new Error(format("While scanning for the next token, found "
|
2012-12-11 16:06:20 +00:00
|
|
|
"character \'%s\', index %s that cannot start any token"
|
2012-12-27 20:21:18 +00:00
|
|
|
, c, to!int(c)), reader_.mark);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///Return the token number of the nearest possible simple key.
|
2014-07-24 01:50:39 +00:00
|
|
|
uint nextPossibleSimpleKey() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
uint minTokenNumber = uint.max;
|
|
|
|
foreach(k, ref simpleKey; possibleSimpleKeys_)
|
|
|
|
{
|
2011-10-24 18:36:26 +00:00
|
|
|
if(simpleKey.isNull){continue;}
|
2011-08-16 12:53:13 +00:00
|
|
|
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
|
|
|
|
}
|
|
|
|
return minTokenNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove entries that are no longer possible simple keys.
|
|
|
|
*
|
|
|
|
* According to the YAML specification, simple keys
|
|
|
|
* - should be limited to a single line,
|
|
|
|
* - should be no longer than 1024 characters.
|
|
|
|
* Disabling this will allow simple keys of any length and
|
|
|
|
* height (may cause problems if indentation is broken though).
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
void stalePossibleSimpleKeys() @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
foreach(level, ref key; possibleSimpleKeys_)
|
|
|
|
{
|
2011-10-24 18:36:26 +00:00
|
|
|
if(key.isNull){continue;}
|
2011-08-16 12:53:13 +00:00
|
|
|
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
|
|
|
|
{
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce(!key.required,
|
|
|
|
new Error("While scanning a simple key",
|
|
|
|
Mark(key.line, key.column),
|
2011-10-18 14:12:22 +00:00
|
|
|
"could not find expected ':'", reader_.mark));
|
2011-10-24 18:36:26 +00:00
|
|
|
key.isNull = true;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2014-07-22 00:12:18 +00:00
|
|
|
* Check if the next token starts a possible simple key and if so, save its position.
|
|
|
|
*
|
2011-08-16 12:53:13 +00:00
|
|
|
* This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
void savePossibleSimpleKey() @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Check if a simple key is required at the current position.
|
2011-10-30 19:24:43 +00:00
|
|
|
const required = (flowLevel_ == 0 && indent_ == reader_.column);
|
2011-08-16 12:53:13 +00:00
|
|
|
assert(allowSimpleKey_ || !required, "A simple key is required only if it is "
|
|
|
|
"the first token in the current line. Therefore it is always allowed.");
|
|
|
|
|
|
|
|
if(!allowSimpleKey_){return;}
|
|
|
|
|
|
|
|
//The next token might be a simple key, so save its number and position.
|
|
|
|
removePossibleSimpleKey();
|
2011-10-30 19:24:43 +00:00
|
|
|
const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
|
2011-10-24 18:36:26 +00:00
|
|
|
|
|
|
|
const line = reader_.line;
|
|
|
|
const column = reader_.column;
|
2014-07-22 00:12:18 +00:00
|
|
|
const key = SimpleKey(cast(uint)reader_.charIndex,
|
|
|
|
tokenCount,
|
2012-09-26 09:51:47 +00:00
|
|
|
line,
|
2011-10-24 18:36:26 +00:00
|
|
|
column < ushort.max ? cast(ushort)column : ushort.max,
|
|
|
|
required);
|
|
|
|
|
|
|
|
if(possibleSimpleKeys_.length <= flowLevel_)
|
|
|
|
{
|
|
|
|
const oldLength = possibleSimpleKeys_.length;
|
|
|
|
possibleSimpleKeys_.length = flowLevel_ + 1;
|
|
|
|
//No need to initialize the last element, it's already done in the next line.
|
|
|
|
possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
possibleSimpleKeys_[flowLevel_] = key;
|
|
|
|
}
|
|
|
|
|
|
|
|
///Remove the saved possible key position at the current flow level.
|
2014-07-24 01:50:39 +00:00
|
|
|
void removePossibleSimpleKey() @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-24 18:36:26 +00:00
|
|
|
if(possibleSimpleKeys_.length <= flowLevel_){return;}
|
|
|
|
|
|
|
|
if(!possibleSimpleKeys_[flowLevel_].isNull)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-24 18:36:26 +00:00
|
|
|
const key = possibleSimpleKeys_[flowLevel_];
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce(!key.required,
|
|
|
|
new Error("While scanning a simple key", Mark(key.line, key.column),
|
2011-10-18 14:12:22 +00:00
|
|
|
"could not find expected ':'", reader_.mark));
|
2011-10-24 18:36:26 +00:00
|
|
|
possibleSimpleKeys_[flowLevel_].isNull = true;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Decrease indentation, removing entries in indents_.
|
|
|
|
*
|
|
|
|
* Params: column = Current column in the file/stream.
|
|
|
|
*/
|
2012-12-27 20:21:56 +00:00
|
|
|
void unwindIndent(const int column) @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(flowLevel_ > 0)
|
|
|
|
{
|
|
|
|
//In flow context, tokens should respect indentation.
|
2014-07-22 00:12:18 +00:00
|
|
|
//The condition should be `indent >= column` according to the spec.
|
2011-08-16 12:53:13 +00:00
|
|
|
//But this condition will prohibit intuitively correct
|
|
|
|
//constructions such as
|
|
|
|
//key : {
|
|
|
|
//}
|
|
|
|
|
|
|
|
//In the flow context, indentation is ignored. We make the scanner less
|
|
|
|
//restrictive than what the specification requires.
|
|
|
|
//if(pedantic_ && flowLevel_ > 0 && indent_ > column)
|
|
|
|
//{
|
2011-10-18 14:12:22 +00:00
|
|
|
// throw new Error("Invalid intendation or unclosed '[' or '{'",
|
|
|
|
// reader_.mark)
|
2011-08-16 12:53:13 +00:00
|
|
|
//}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
//In block context, we may need to issue the BLOCK-END tokens.
|
|
|
|
while(indent_ > column)
|
|
|
|
{
|
|
|
|
indent_ = indents_.back;
|
2011-10-29 16:21:44 +00:00
|
|
|
indents_.length = indents_.length - 1;
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(blockEndToken(reader_.mark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Increase indentation if needed.
|
|
|
|
*
|
|
|
|
* Params: column = Current column in the file/stream.
|
|
|
|
*
|
|
|
|
* Returns: true if the indentation was increased, false otherwise.
|
|
|
|
*/
|
2012-09-08 23:42:13 +00:00
|
|
|
bool addIndent(int column) @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(indent_ >= column){return false;}
|
|
|
|
indents_ ~= indent_;
|
|
|
|
indent_ = column;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///Add STREAM-START token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchStreamStart() @safe nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Add STREAM-END token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchStreamEnd() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Set intendation to -1 .
|
|
|
|
unwindIndent(-1);
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
allowSimpleKey_ = false;
|
2014-07-19 02:17:18 +00:00
|
|
|
possibleSimpleKeys_.destroy;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
done_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
///Add DIRECTIVE token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchDirective() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Set intendation to -1 .
|
|
|
|
unwindIndent(-1);
|
|
|
|
//Reset simple keys.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
allowSimpleKey_ = false;
|
|
|
|
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(scanDirective());
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Add DOCUMENT-START or DOCUMENT-END token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchDocumentIndicator(TokenID id)() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
if(id == TokenID.DocumentStart || id == TokenID.DocumentEnd)
|
|
|
|
{
|
|
|
|
//Set indentation to -1 .
|
|
|
|
unwindIndent(-1);
|
|
|
|
//Reset simple keys. Note that there can't be a block collection after '---'.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
allowSimpleKey_ = false;
|
|
|
|
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward(3);
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(simpleToken!id(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Aliases to add DOCUMENT-START or DOCUMENT-END token.
|
|
|
|
alias fetchDocumentIndicator!(TokenID.DocumentStart) fetchDocumentStart;
|
|
|
|
alias fetchDocumentIndicator!(TokenID.DocumentEnd) fetchDocumentEnd;
|
|
|
|
|
|
|
|
///Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchFlowCollectionStart(TokenID id)() @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//'[' and '{' may start a simple key.
|
|
|
|
savePossibleSimpleKey();
|
|
|
|
//Simple keys are allowed after '[' and '{'.
|
|
|
|
allowSimpleKey_ = true;
|
|
|
|
++flowLevel_;
|
|
|
|
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward();
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(simpleToken!id(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
|
|
|
|
alias fetchFlowCollectionStart!(TokenID.FlowSequenceStart) fetchFlowSequenceStart;
|
|
|
|
alias fetchFlowCollectionStart!(TokenID.FlowMappingStart) fetchFlowMappingStart;
|
|
|
|
|
|
|
|
///Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchFlowCollectionEnd(TokenID id)() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Reset possible simple key on the current level.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
//No simple keys after ']' and '}'.
|
|
|
|
allowSimpleKey_ = false;
|
|
|
|
--flowLevel_;
|
|
|
|
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward();
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(simpleToken!id(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
|
|
|
|
alias fetchFlowCollectionEnd!(TokenID.FlowSequenceEnd) fetchFlowSequenceEnd;
|
|
|
|
alias fetchFlowCollectionEnd!(TokenID.FlowMappingEnd) fetchFlowMappingEnd;
|
|
|
|
|
|
|
|
///Add FLOW-ENTRY token;
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchFlowEntry() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Reset possible simple key on the current level.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
//Simple keys are allowed after ','.
|
|
|
|
allowSimpleKey_ = true;
|
|
|
|
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward();
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(flowEntryToken(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Additional checks used in block context in fetchBlockEntry and fetchKey.
|
|
|
|
*
|
2014-07-22 00:12:18 +00:00
|
|
|
* Params: type = String representing the token type we might need to add.
|
2011-08-16 12:53:13 +00:00
|
|
|
* id = Token type we might need to add.
|
|
|
|
*/
|
2012-09-08 23:42:13 +00:00
|
|
|
void blockChecks(string type, TokenID id)() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Are we allowed to start a key (not neccesarily a simple one)?
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce(allowSimpleKey_, new Error(type ~ " keys are not allowed here",
|
2011-10-18 14:12:22 +00:00
|
|
|
reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
if(addIndent(reader_.column))
|
|
|
|
{
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
///Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchBlockEntry() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(flowLevel_ == 0){blockChecks!("Sequence", TokenID.BlockSequenceStart)();}
|
2014-07-22 00:12:18 +00:00
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
//It's an error for the block entry to occur in the flow context,
|
|
|
|
//but we let the parser detect this.
|
|
|
|
|
|
|
|
//Reset possible simple key on the current level.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
//Simple keys are allowed after '-'.
|
|
|
|
allowSimpleKey_ = true;
|
|
|
|
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward();
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(blockEntryToken(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Add KEY token. Might add BLOCK-MAPPING-START in the process.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchKey() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(flowLevel_ == 0){blockChecks!("Mapping", TokenID.BlockMappingStart)();}
|
|
|
|
|
|
|
|
//Reset possible simple key on the current level.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
//Simple keys are allowed after '?' in the block context.
|
|
|
|
allowSimpleKey_ = (flowLevel_ == 0);
|
|
|
|
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward();
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(keyToken(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchValue() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Do we determine a simple key?
|
2014-07-22 00:12:18 +00:00
|
|
|
if(possibleSimpleKeys_.length > flowLevel_ &&
|
2011-10-24 18:36:26 +00:00
|
|
|
!possibleSimpleKeys_[flowLevel_].isNull)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-30 19:24:43 +00:00
|
|
|
const key = possibleSimpleKeys_[flowLevel_];
|
2011-10-24 18:36:26 +00:00
|
|
|
possibleSimpleKeys_[flowLevel_].isNull = true;
|
2011-08-16 12:53:13 +00:00
|
|
|
Mark keyMark = Mark(key.line, key.column);
|
2011-10-30 19:24:43 +00:00
|
|
|
const idx = key.tokenIndex - tokensTaken_;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
assert(idx >= 0);
|
|
|
|
|
|
|
|
//Add KEY.
|
|
|
|
//Manually inserting since tokens are immutable (need linked list).
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.insert(keyToken(keyMark, keyMark), idx);
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
//If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
|
|
|
|
if(flowLevel_ == 0 && addIndent(key.column))
|
|
|
|
{
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//There cannot be two simple keys in a row.
|
|
|
|
allowSimpleKey_ = false;
|
|
|
|
}
|
|
|
|
//Part of a complex key
|
|
|
|
else
|
|
|
|
{
|
|
|
|
//We can start a complex value if and only if we can start a simple key.
|
|
|
|
enforce(flowLevel_ > 0 || allowSimpleKey_,
|
2011-10-18 14:12:22 +00:00
|
|
|
new Error("Mapping values are not allowed here", reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
//If this value starts a new block mapping, we need to add
|
|
|
|
//BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
|
|
|
|
if(flowLevel_ == 0 && addIndent(reader_.column))
|
|
|
|
{
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//Reset possible simple key on the current level.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
//Simple keys are allowed after ':' in the block context.
|
|
|
|
allowSimpleKey_ = (flowLevel_ == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
//Add VALUE.
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
reader_.forward();
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(valueToken(startMark, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Add ALIAS or ANCHOR token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchAnchor_(TokenID id)() @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
if(id == TokenID.Alias || id == TokenID.Anchor)
|
|
|
|
{
|
|
|
|
//ALIAS/ANCHOR could be a simple key.
|
|
|
|
savePossibleSimpleKey();
|
|
|
|
//No simple keys after ALIAS/ANCHOR.
|
|
|
|
allowSimpleKey_ = false;
|
|
|
|
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(scanAnchor(id));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Aliases to add ALIAS or ANCHOR token.
|
|
|
|
alias fetchAnchor_!(TokenID.Alias) fetchAlias;
|
|
|
|
alias fetchAnchor_!(TokenID.Anchor) fetchAnchor;
|
|
|
|
|
|
|
|
///Add TAG token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchTag() @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//TAG could start a simple key.
|
|
|
|
savePossibleSimpleKey();
|
|
|
|
//No simple keys after TAG.
|
|
|
|
allowSimpleKey_ = false;
|
|
|
|
|
2011-10-20 08:34:34 +00:00
|
|
|
tokens_.push(scanTag());
|
2014-07-25 16:28:08 +00:00
|
|
|
throwIfError();
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Add block SCALAR token.
|
2012-09-08 23:42:13 +00:00
|
|
|
void fetchBlockScalar(ScalarStyle style)() @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
if(style == ScalarStyle.Literal || style == ScalarStyle.Folded)
|
|
|
|
{
|
|
|
|
//Reset possible simple key on the current level.
|
|
|
|
removePossibleSimpleKey();
|
|
|
|
//A simple key may follow a block scalar.
|
|
|
|
allowSimpleKey_ = true;
|
|
|
|
|
2014-07-26 00:53:05 +00:00
|
|
|
auto blockScalar = scanBlockScalar(style);
|
|
|
|
throwIfError();
|
|
|
|
tokens_.push(blockScalar);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-24 21:22:26 +00:00
|
|
|
/// Aliases to add literal or folded block scalar.
|
2011-08-16 12:53:13 +00:00
|
|
|
alias fetchBlockScalar!(ScalarStyle.Literal) fetchLiteral;
|
|
|
|
alias fetchBlockScalar!(ScalarStyle.Folded) fetchFolded;
|
|
|
|
|
2014-07-24 21:22:14 +00:00
|
|
|
/// Add quoted flow SCALAR token.
|
|
|
|
void fetchFlowScalar(ScalarStyle quotes)() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 21:22:26 +00:00
|
|
|
// A flow scalar could be a simple key.
|
2011-08-16 12:53:13 +00:00
|
|
|
savePossibleSimpleKey();
|
2014-07-24 21:22:26 +00:00
|
|
|
// No simple keys after flow scalars.
|
2011-08-16 12:53:13 +00:00
|
|
|
allowSimpleKey_ = false;
|
|
|
|
|
2014-07-24 21:21:59 +00:00
|
|
|
// Scan and add SCALAR.
|
|
|
|
const scalar = scanFlowScalar(quotes);
|
|
|
|
throwIfError();
|
|
|
|
tokens_.push(scalar);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-24 21:22:26 +00:00
|
|
|
/// Aliases to add single or double quoted block scalar.
|
2011-08-16 12:53:13 +00:00
|
|
|
alias fetchFlowScalar!(ScalarStyle.SingleQuoted) fetchSingle;
|
|
|
|
alias fetchFlowScalar!(ScalarStyle.DoubleQuoted) fetchDouble;
|
|
|
|
|
2014-07-24 16:43:33 +00:00
|
|
|
/// Add plain SCALAR token.
|
2014-07-24 21:22:14 +00:00
|
|
|
void fetchPlain() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 16:43:33 +00:00
|
|
|
// A plain scalar could be a simple key
|
2011-08-16 12:53:13 +00:00
|
|
|
savePossibleSimpleKey();
|
2014-07-24 16:43:33 +00:00
|
|
|
// No simple keys after plain scalars. But note that scanPlain() will
|
|
|
|
// change this flag if the scan is finished at the beginning of the line.
|
2011-08-16 12:53:13 +00:00
|
|
|
allowSimpleKey_ = false;
|
2014-07-23 16:28:57 +00:00
|
|
|
const plain = scanPlain();
|
2014-07-24 16:43:33 +00:00
|
|
|
throwIfError();
|
|
|
|
|
|
|
|
// Scan and add SCALAR. May change allowSimpleKey_
|
2014-07-23 16:28:57 +00:00
|
|
|
tokens_.push(plain);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Check if the next token is DIRECTIVE: ^ '%' ...
|
2014-07-24 01:50:39 +00:00
|
|
|
bool checkDirective() @safe pure nothrow @nogc
|
2012-09-08 23:42:13 +00:00
|
|
|
{
|
|
|
|
return reader_.peek() == '%' && reader_.column == 0;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-23 00:17:19 +00:00
|
|
|
/// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n')
|
2014-07-24 16:43:46 +00:00
|
|
|
bool checkDocumentStart() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:17:19 +00:00
|
|
|
// Check one char first, then all 3, to prevent reading outside the buffer.
|
2014-07-22 00:12:18 +00:00
|
|
|
return reader_.column == 0 &&
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_.peek() == '-' &&
|
|
|
|
reader_.prefix(3) == "---" &&
|
2014-07-22 00:12:18 +00:00
|
|
|
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:17:19 +00:00
|
|
|
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
|
2014-07-24 01:50:39 +00:00
|
|
|
bool checkDocumentEnd() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:17:19 +00:00
|
|
|
// Check one char first, then all 3, to prevent reading outside the buffer.
|
2014-07-22 00:12:18 +00:00
|
|
|
return reader_.column == 0 &&
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_.peek() == '.' &&
|
|
|
|
reader_.prefix(3) == "..." &&
|
2014-07-22 00:12:18 +00:00
|
|
|
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
|
2014-07-24 01:50:39 +00:00
|
|
|
bool checkBlockEntry() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-22 00:12:18 +00:00
|
|
|
return reader_.peek() == '-' &&
|
|
|
|
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(1));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if the next token is KEY(flow context): '?'
|
2014-07-22 00:12:18 +00:00
|
|
|
*
|
2011-08-16 12:53:13 +00:00
|
|
|
* or KEY(block context): '?' (' '|'\n')
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
bool checkKey() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-22 00:12:18 +00:00
|
|
|
return reader_.peek() == '?' &&
|
|
|
|
(flowLevel_ > 0 ||
|
|
|
|
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(1)));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if the next token is VALUE(flow context): ':'
|
2014-07-22 00:12:18 +00:00
|
|
|
*
|
2011-08-16 12:53:13 +00:00
|
|
|
* or VALUE(block context): ':' (' '|'\n')
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
bool checkValue() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-22 00:12:18 +00:00
|
|
|
return reader_.peek() == ':' &&
|
|
|
|
(flowLevel_ > 0 ||
|
|
|
|
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(1)));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if the next token is a plain scalar.
|
|
|
|
*
|
|
|
|
* A plain scalar may start with any non-space character except:
|
|
|
|
* '-', '?', ':', ',', '[', ']', '{', '}',
|
|
|
|
* '#', '&', '*', '!', '|', '>', '\'', '\"',
|
|
|
|
* '%', '@', '`'.
|
2014-07-22 00:12:18 +00:00
|
|
|
*
|
2011-08-16 12:53:13 +00:00
|
|
|
* It may also start with
|
|
|
|
* '-', '?', ':'
|
|
|
|
* if it is followed by a non-space character.
|
2014-07-22 00:12:18 +00:00
|
|
|
*
|
2011-08-16 12:53:13 +00:00
|
|
|
* Note that we limit the last rule to the block context (except the
|
|
|
|
* '-' character) because we want the flow context to be space
|
|
|
|
* independent.
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
bool checkPlain() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const c = reader_.peek();
|
2011-10-23 22:46:35 +00:00
|
|
|
return !("-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d.canFind(c)) ||
|
|
|
|
(!" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(1)) &&
|
|
|
|
(c == '-' || (flowLevel_ == 0 && "?:"d.canFind(c))));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Move to the next non-space character.
|
|
|
|
void findNextNonSpace() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:46:36 +00:00
|
|
|
while(reader_.peek() == ' ') { reader_.forward(); }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan a string of alphanumeric or "-_" characters.
|
2014-07-24 01:06:24 +00:00
|
|
|
dchar[] scanAlphaNumeric(string name)(const Mark startMark) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
uint length = 0;
|
|
|
|
dchar c = reader_.peek();
|
2011-10-23 22:46:35 +00:00
|
|
|
while(isAlphaNum(c) || "-_"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
++length;
|
|
|
|
c = reader_.peek(length);
|
|
|
|
}
|
|
|
|
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce(length > 0,
|
2011-10-18 14:12:22 +00:00
|
|
|
new Error("While scanning " ~ name, startMark,
|
2014-07-22 00:12:18 +00:00
|
|
|
"expected alphanumeric, - or _, but found " ~ to!string(c),
|
2011-10-18 14:12:22 +00:00
|
|
|
reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
return reader_.get(length);
|
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan all characters until next line break.
|
2014-07-24 01:06:24 +00:00
|
|
|
dchar[] scanToNextBreak() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
uint length = 0;
|
2014-07-23 00:46:36 +00:00
|
|
|
while(!"\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(length)))
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
return reader_.get(length);
|
|
|
|
}
|
|
|
|
|
2014-07-26 00:27:11 +00:00
|
|
|
/// Scan all characters until next line break.
|
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
|
|
|
void scanToNextBreakToSlice() @system pure nothrow @nogc
|
|
|
|
{
|
|
|
|
uint length = 0;
|
|
|
|
while(!"\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(length)))
|
|
|
|
{
|
|
|
|
++length;
|
|
|
|
}
|
|
|
|
reader_.sliceBuilder.write(reader_.get(length));
|
|
|
|
}
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
/**
|
|
|
|
* Move to next token in the file/stream.
|
|
|
|
*
|
|
|
|
* We ignore spaces, line breaks and comments.
|
|
|
|
* If we find a line break in the block context, we set
|
|
|
|
* allowSimpleKey` on.
|
|
|
|
*
|
|
|
|
* We do not yet support BOM inside the stream as the
|
|
|
|
* specification requires. Any such mark will be considered as a part
|
|
|
|
* of the document.
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
void scanToNextToken() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
|
|
|
|
// Tabs cannot precede tokens
|
|
|
|
// BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
|
|
|
|
// KEY(block), VALUE(block), BLOCK-ENTRY
|
|
|
|
//So the checking code is
|
|
|
|
// if <TAB>:
|
|
|
|
// allowSimpleKey_ = false
|
|
|
|
//We also need to add the check for `allowSimpleKey_ == true` to
|
|
|
|
//`unwindIndent` before issuing BLOCK-END.
|
|
|
|
//Scanners for block, flow, and plain scalars need to be modified.
|
|
|
|
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
findNextNonSpace();
|
|
|
|
|
|
|
|
if(reader_.peek() == '#'){scanToNextBreak();}
|
2014-07-22 00:12:18 +00:00
|
|
|
if(scanLineBreak() != '\0')
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(flowLevel_ == 0){allowSimpleKey_ = true;}
|
|
|
|
}
|
|
|
|
else{break;}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
///Scan directive token.
|
2014-07-24 01:50:39 +00:00
|
|
|
Token scanDirective() @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
Mark startMark = reader_.mark;
|
|
|
|
//Skip the '%'.
|
|
|
|
reader_.forward();
|
|
|
|
|
2014-07-24 01:06:24 +00:00
|
|
|
auto name = scanDirectiveName(startMark);
|
|
|
|
auto value = name == "YAML" ? scanYAMLDirectiveValue(startMark):
|
|
|
|
name == "TAG" ? scanTagDirectiveValue(startMark) : "";
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
Mark endMark = reader_.mark;
|
|
|
|
|
|
|
|
if(!["YAML"d, "TAG"d].canFind(name)){scanToNextBreak();}
|
|
|
|
scanDirectiveIgnoredLine(startMark);
|
|
|
|
|
|
|
|
//Storing directive name and value in a single string, separated by zero.
|
2014-07-23 16:19:35 +00:00
|
|
|
return directiveToken(startMark, endMark, utf32To8(name ~ '\0' ~ value));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
///Scan name of a directive token.
|
2014-07-24 01:50:39 +00:00
|
|
|
dchar[] scanDirectiveName(const Mark startMark) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
//Scan directive name.
|
2014-07-24 01:06:24 +00:00
|
|
|
auto name = scanAlphaNumeric!"a directive"(startMark);
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
|
2011-10-18 14:12:22 +00:00
|
|
|
new Error("While scanning a directive", startMark,
|
2014-07-22 00:12:18 +00:00
|
|
|
"expected alphanumeric, - or _, but found "
|
2011-10-18 14:12:22 +00:00
|
|
|
~ to!string(reader_.peek()), reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
///Scan value of a YAML directive token. Returns major, minor version separated by '.'.
|
2014-07-24 01:50:39 +00:00
|
|
|
dchar[] scanYAMLDirectiveValue(const Mark startMark) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
findNextNonSpace();
|
|
|
|
|
2014-07-24 01:06:24 +00:00
|
|
|
dchar[] result = scanYAMLDirectiveNumber(startMark);
|
2011-08-16 12:53:13 +00:00
|
|
|
enforce(reader_.peek() == '.',
|
2014-07-22 00:12:18 +00:00
|
|
|
new Error("While scanning a directive", startMark,
|
|
|
|
"expected a digit or '.', but found: "
|
2011-10-18 14:12:22 +00:00
|
|
|
~ to!string(reader_.peek()), reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
//Skip the '.'.
|
|
|
|
reader_.forward();
|
|
|
|
|
|
|
|
result ~= '.' ~ scanYAMLDirectiveNumber(startMark);
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
|
2011-10-18 14:12:22 +00:00
|
|
|
new Error("While scanning a directive", startMark,
|
2014-07-22 00:12:18 +00:00
|
|
|
"expected a digit or '.', but found: "
|
2011-10-18 14:12:22 +00:00
|
|
|
~ to!string(reader_.peek()), reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan a number from a YAML directive.
|
2014-07-24 01:06:24 +00:00
|
|
|
dchar[] scanYAMLDirectiveNumber(const Mark startMark) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
enforce(isDigit(reader_.peek()),
|
2014-07-22 00:12:18 +00:00
|
|
|
new Error("While scanning a directive", startMark,
|
|
|
|
"expected a digit, but found: " ~
|
2014-07-23 00:46:36 +00:00
|
|
|
reader_.peek().to!string, reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-23 00:46:36 +00:00
|
|
|
// Already found the first digit in the enforce(), so set length to 1.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint length = 1;
|
2014-07-23 00:46:36 +00:00
|
|
|
while(isDigit(reader_.peek(length))) { ++length; }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
return reader_.get(length);
|
|
|
|
}
|
|
|
|
|
2014-07-25 00:32:29 +00:00
|
|
|
/// Scan value of a tag directive.
|
|
|
|
dstring scanTagDirectiveValue(const Mark startMark) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
findNextNonSpace();
|
2011-10-30 19:24:43 +00:00
|
|
|
const handle = scanTagDirectiveHandle(startMark);
|
2011-08-16 12:53:13 +00:00
|
|
|
findNextNonSpace();
|
|
|
|
return handle ~ '\0' ~ scanTagDirectivePrefix(startMark);
|
|
|
|
}
|
|
|
|
|
|
|
|
///Scan handle of a tag directive.
|
2014-07-25 00:32:29 +00:00
|
|
|
dstring scanTagDirectiveHandle(const Mark startMark) @trusted pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:32:29 +00:00
|
|
|
reader_.sliceBuilder.begin();
|
|
|
|
{
|
|
|
|
scope(failure) { reader_.sliceBuilder.finish(); }
|
2014-07-25 14:55:15 +00:00
|
|
|
scanTagHandleToSlice!"directive"(startMark);
|
|
|
|
throwIfError();
|
2014-07-25 00:32:29 +00:00
|
|
|
}
|
|
|
|
auto value = reader_.sliceBuilder.finish();
|
2011-08-16 12:53:13 +00:00
|
|
|
enforce(reader_.peek() == ' ',
|
2014-07-22 00:12:18 +00:00
|
|
|
new Error("While scanning a directive handle", startMark,
|
|
|
|
"expected ' ', but found: " ~ to!string(reader_.peek()),
|
2011-10-18 14:12:22 +00:00
|
|
|
reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2014-07-25 00:34:53 +00:00
|
|
|
/// Scan prefix of a tag directive.
|
2014-07-25 00:51:06 +00:00
|
|
|
dstring scanTagDirectivePrefix(const Mark startMark) @trusted pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:51:06 +00:00
|
|
|
reader_.sliceBuilder.begin();
|
|
|
|
{
|
|
|
|
scope(failure) { reader_.sliceBuilder.finish(); }
|
2014-07-25 15:33:54 +00:00
|
|
|
scanTagURIToSlice!"directive"(startMark);
|
|
|
|
throwIfError();
|
2014-07-25 00:51:06 +00:00
|
|
|
}
|
|
|
|
auto value = reader_.sliceBuilder.finish();
|
2011-10-23 22:46:35 +00:00
|
|
|
enforce(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
|
2011-10-18 14:12:22 +00:00
|
|
|
new Error("While scanning a directive prefix", startMark,
|
2014-07-25 00:34:53 +00:00
|
|
|
"expected ' ', but found" ~ reader_.peek().to!string,
|
2011-10-18 14:12:22 +00:00
|
|
|
reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2014-07-22 00:12:18 +00:00
|
|
|
///Scan (and ignore) ignored line after a directive.
|
2014-07-24 01:50:39 +00:00
|
|
|
void scanDirectiveIgnoredLine(const Mark startMark) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
findNextNonSpace();
|
|
|
|
if(reader_.peek() == '#'){scanToNextBreak();}
|
2011-10-23 22:46:35 +00:00
|
|
|
enforce("\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()),
|
2011-10-18 14:12:22 +00:00
|
|
|
new Error("While scanning a directive", startMark,
|
2014-07-22 00:12:18 +00:00
|
|
|
"expected comment or a line break, but found"
|
2011-10-18 14:12:22 +00:00
|
|
|
~ to!string(reader_.peek()), reader_.mark));
|
2011-08-16 12:53:13 +00:00
|
|
|
scanLineBreak();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Scan an alias or an anchor.
|
|
|
|
*
|
|
|
|
* The specification does not restrict characters for anchors and
|
|
|
|
* aliases. This may lead to problems, for instance, the document:
|
|
|
|
* [ *alias, value ]
|
|
|
|
* can be interpteted in two ways, as
|
|
|
|
* [ "value" ]
|
|
|
|
* and
|
|
|
|
* [ *alias , "value" ]
|
|
|
|
* Therefore we restrict aliases to ASCII alphanumeric characters.
|
|
|
|
*/
|
2014-07-24 01:50:39 +00:00
|
|
|
Token scanAnchor(TokenID id) @safe pure
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const startMark = reader_.mark;
|
|
|
|
|
2011-10-30 19:24:43 +00:00
|
|
|
const dchar i = reader_.get();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 01:06:24 +00:00
|
|
|
dchar[] value = i == '*' ? scanAlphaNumeric!("an alias")(startMark)
|
2014-07-22 00:12:18 +00:00
|
|
|
: scanAlphaNumeric!("an anchor")(startMark);
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-22 00:12:18 +00:00
|
|
|
enforce((" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()) ||
|
2011-08-16 12:53:13 +00:00
|
|
|
("?:,]}%@").canFind(reader_.peek())),
|
2014-07-22 00:12:18 +00:00
|
|
|
new Error("While scanning an " ~ (i == '*') ? "alias" : "anchor",
|
2011-08-16 12:53:13 +00:00
|
|
|
startMark, "expected alphanumeric, - or _, but found "~
|
|
|
|
to!string(reader_.peek()), reader_.mark));
|
|
|
|
|
|
|
|
if(id == TokenID.Alias)
|
|
|
|
{
|
2014-07-23 16:19:35 +00:00
|
|
|
return aliasToken(startMark, reader_.mark, value.utf32To8);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
else if(id == TokenID.Anchor)
|
|
|
|
{
|
2014-07-23 16:19:35 +00:00
|
|
|
return anchorToken(startMark, reader_.mark, value.utf32To8);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
assert(false, "This code should never be reached");
|
|
|
|
}
|
|
|
|
|
2014-07-25 00:34:53 +00:00
|
|
|
/// Scan a tag token.
|
2014-07-25 16:28:08 +00:00
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
|
|
|
Token scanTag() @trusted pure nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const startMark = reader_.mark;
|
|
|
|
dchar c = reader_.peek(1);
|
2014-07-25 01:36:55 +00:00
|
|
|
|
|
|
|
reader_.sliceBuilder.begin();
|
|
|
|
scope(failure) { reader_.sliceBuilder.finish(); }
|
|
|
|
// Index where tag handle ends and tag suffix starts in the tag value
|
|
|
|
// (slice) we will produce.
|
|
|
|
uint handleEnd;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
if(c == '<')
|
|
|
|
{
|
|
|
|
reader_.forward(2);
|
2014-07-25 00:51:06 +00:00
|
|
|
|
2014-07-25 01:36:55 +00:00
|
|
|
handleEnd = 0;
|
2014-07-25 15:33:54 +00:00
|
|
|
scanTagURIToSlice!"tag"(startMark);
|
2014-07-25 16:28:08 +00:00
|
|
|
if(error_) { return Token.init; }
|
|
|
|
if(reader_.peek() != '>')
|
|
|
|
{
|
|
|
|
setError("While scanning a tag", startMark,
|
|
|
|
buildMsg("expected '>' but found ", reader_.peek()),
|
|
|
|
reader_.mark);
|
|
|
|
return Token.init;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_.forward();
|
|
|
|
}
|
2011-10-23 22:46:35 +00:00
|
|
|
else if(" \t\0\n\r\u0085\u2028\u2029"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
reader_.forward();
|
2014-07-25 01:36:55 +00:00
|
|
|
handleEnd = 0;
|
|
|
|
reader_.sliceBuilder.write('!');
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
uint length = 1;
|
|
|
|
bool useHandle = false;
|
|
|
|
|
2011-10-23 22:46:35 +00:00
|
|
|
while(!" \0\n\r\u0085\u2028\u2029"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(c == '!')
|
|
|
|
{
|
|
|
|
useHandle = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++length;
|
|
|
|
c = reader_.peek(length);
|
|
|
|
}
|
|
|
|
|
2014-07-25 00:32:29 +00:00
|
|
|
if(useHandle)
|
|
|
|
{
|
2014-07-25 14:55:15 +00:00
|
|
|
scanTagHandleToSlice!"tag"(startMark);
|
2014-07-25 01:36:55 +00:00
|
|
|
handleEnd = cast(uint)reader_.sliceBuilder.length;
|
2014-07-25 16:28:08 +00:00
|
|
|
if(error_) { return Token.init; }
|
2014-07-25 00:32:29 +00:00
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
reader_.forward();
|
2014-07-25 01:36:55 +00:00
|
|
|
reader_.sliceBuilder.write('!');
|
|
|
|
handleEnd = cast(uint)reader_.sliceBuilder.length;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 15:33:54 +00:00
|
|
|
scanTagURIToSlice!"tag"(startMark);
|
2014-07-25 16:28:08 +00:00
|
|
|
if(error_) { return Token.init; }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 16:28:08 +00:00
|
|
|
if(!" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
|
|
|
|
{
|
|
|
|
setError("While scanning a tag", startMark,
|
|
|
|
buildMsg("expected ' ' but found ", reader_.peek()),
|
|
|
|
reader_.mark);
|
|
|
|
return Token.init;
|
|
|
|
}
|
|
|
|
const slice = reader_.sliceBuilder.finish();
|
2014-07-25 01:36:55 +00:00
|
|
|
return tagToken(startMark, reader_.mark, slice.utf32To8, handleEnd);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-26 00:29:27 +00:00
|
|
|
/// Scan a block scalar token with specified style.
|
2014-07-26 00:53:05 +00:00
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
|
|
|
Token scanBlockScalar(const ScalarStyle style) @trusted pure nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const startMark = reader_.mark;
|
|
|
|
|
2014-07-26 00:29:27 +00:00
|
|
|
// Scan the header.
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_.forward();
|
|
|
|
|
|
|
|
const indicators = scanBlockScalarIndicators(startMark);
|
2014-07-26 00:53:05 +00:00
|
|
|
if(error_) { return Token.init; }
|
2014-07-25 19:12:33 +00:00
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
const chomping = indicators[0];
|
|
|
|
const increment = indicators[1];
|
|
|
|
scanBlockScalarIgnoredLine(startMark);
|
2014-07-26 00:53:05 +00:00
|
|
|
if(error_) { return Token.init; }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-26 00:29:27 +00:00
|
|
|
// Determine the indentation level and go to the first non-empty line.
|
2011-08-16 12:53:13 +00:00
|
|
|
Mark endMark;
|
2011-10-11 13:58:23 +00:00
|
|
|
uint indent = max(1, indent_ + 1);
|
2014-07-26 00:29:27 +00:00
|
|
|
|
2014-07-25 21:50:04 +00:00
|
|
|
reader_.sliceBuilder.begin();
|
2014-07-26 00:33:16 +00:00
|
|
|
alias Transaction = SliceBuilder.Transaction;
|
|
|
|
// Used to strip the last line breaks written to the slice at the end of the
|
|
|
|
// scalar, which may be needed based on chomping.
|
|
|
|
Transaction breaksTransaction = Transaction(reader_.sliceBuilder);
|
|
|
|
// Read the first indentation/line breaks before the scalar.
|
|
|
|
size_t startLen = reader_.sliceBuilder.length;
|
2011-08-16 12:53:13 +00:00
|
|
|
if(increment == int.min)
|
|
|
|
{
|
2014-07-25 21:45:47 +00:00
|
|
|
auto indentation = scanBlockScalarIndentationToSlice();
|
|
|
|
endMark = indentation[1];
|
|
|
|
indent = max(indent, indentation[0]);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
indent += increment - 1;
|
2014-07-25 21:48:28 +00:00
|
|
|
endMark = scanBlockScalarBreaksToSlice(indent);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-26 00:33:16 +00:00
|
|
|
size_t endLen = reader_.sliceBuilder.length;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-26 00:41:42 +00:00
|
|
|
// dchar.max means there's no line break.
|
|
|
|
dchar lineBreak = dchar.max;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
|
2014-07-26 00:29:27 +00:00
|
|
|
// Scan the inner part of the block scalar.
|
2011-08-16 12:53:13 +00:00
|
|
|
while(reader_.column == indent && reader_.peek() != '\0')
|
|
|
|
{
|
2014-07-26 00:33:16 +00:00
|
|
|
breaksTransaction.commit();
|
2011-10-23 22:46:35 +00:00
|
|
|
const bool leadingNonSpace = !" \t"d.canFind(reader_.peek());
|
2014-07-26 00:33:16 +00:00
|
|
|
// This is where the 'interesting' non-whitespace data gets read.
|
|
|
|
scanToNextBreakToSlice();
|
2014-07-26 00:41:42 +00:00
|
|
|
lineBreak = scanLineBreak();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-26 00:33:16 +00:00
|
|
|
// This transaction serves to rollback data read in the
|
|
|
|
// scanBlockScalarBreaksToSlice() call.
|
|
|
|
breaksTransaction = Transaction(reader_.sliceBuilder);
|
|
|
|
startLen = reader_.sliceBuilder.length;
|
|
|
|
// The line breaks should actually be written _after_ the if() block
|
2014-07-26 00:41:42 +00:00
|
|
|
// below. We work around that by inserting
|
2014-07-26 00:33:16 +00:00
|
|
|
endMark = scanBlockScalarBreaksToSlice(indent);
|
|
|
|
endLen = reader_.sliceBuilder.length;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-26 00:33:16 +00:00
|
|
|
// This will not run during the last iteration (see the if() vs the
|
|
|
|
// while()), hence breaksTransaction rollback (which happens after this
|
|
|
|
// loop) will never roll back data written in this if() block.
|
2011-08-16 12:53:13 +00:00
|
|
|
if(reader_.column == indent && reader_.peek() != '\0')
|
|
|
|
{
|
2014-07-26 00:34:02 +00:00
|
|
|
// Unfortunately, folding rules are ambiguous.
|
2014-07-22 00:12:18 +00:00
|
|
|
|
2014-07-26 00:34:02 +00:00
|
|
|
// This is the folding according to the specification:
|
2014-07-26 00:41:42 +00:00
|
|
|
if(style == ScalarStyle.Folded && lineBreak == '\n' &&
|
2011-10-23 22:46:35 +00:00
|
|
|
leadingNonSpace && !" \t"d.canFind(reader_.peek()))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-26 00:33:16 +00:00
|
|
|
// No breaks were scanned; no need to insert the space in the
|
|
|
|
// middle of slice.
|
|
|
|
if(startLen == endLen)
|
|
|
|
{
|
|
|
|
reader_.sliceBuilder.write(' ');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// We need to insert in the middle of the slice in case any line
|
|
|
|
// breaks were scanned.
|
2014-07-26 00:41:42 +00:00
|
|
|
reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-26 00:41:42 +00:00
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
////this is Clark Evans's interpretation (also in the spec
|
|
|
|
////examples):
|
|
|
|
//
|
2014-07-26 00:41:42 +00:00
|
|
|
//if(style == ScalarStyle.Folded && lineBreak == '\n')
|
2011-08-16 12:53:13 +00:00
|
|
|
//{
|
2014-07-26 00:33:49 +00:00
|
|
|
// if(startLen == endLen)
|
2011-08-16 12:53:13 +00:00
|
|
|
// {
|
2014-07-26 00:33:49 +00:00
|
|
|
// if(!" \t"d.canFind(reader_.peek()))
|
|
|
|
// {
|
|
|
|
// reader_.sliceBuilder.write(' ');
|
|
|
|
// }
|
|
|
|
// else
|
|
|
|
// {
|
2014-07-26 00:41:42 +00:00
|
|
|
// chunks ~= lineBreak;
|
2014-07-26 00:33:49 +00:00
|
|
|
// }
|
2011-08-16 12:53:13 +00:00
|
|
|
// }
|
|
|
|
//}
|
2014-07-26 00:33:49 +00:00
|
|
|
//else
|
|
|
|
//{
|
2014-07-26 00:41:42 +00:00
|
|
|
// reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
|
2014-07-26 00:33:49 +00:00
|
|
|
//}
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-26 00:33:16 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If chompint is Keep, we keep (commit) the last scanned line breaks
|
|
|
|
// (which are at the end of the scalar). Otherwise re remove them (end the
|
|
|
|
// transaction).
|
|
|
|
if(chomping == Chomping.Keep) { breaksTransaction.commit(); }
|
|
|
|
else { breaksTransaction.__dtor(); }
|
2014-07-26 00:41:42 +00:00
|
|
|
if(chomping != Chomping.Strip && lineBreak != dchar.max)
|
2014-07-26 00:33:16 +00:00
|
|
|
{
|
|
|
|
// If chomping is Keep, we keep the line break but the first line break
|
|
|
|
// that isn't stripped (since chomping isn't Strip in this branch) must
|
|
|
|
// be inserted _before_ the other line breaks.
|
|
|
|
if(chomping == Chomping.Keep)
|
|
|
|
{
|
2014-07-26 00:41:42 +00:00
|
|
|
reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
|
2014-07-26 00:33:16 +00:00
|
|
|
}
|
|
|
|
// If chomping is not Keep, breaksTransaction was cancelled so we can
|
|
|
|
// directly write the first line break (as it isn't stripped - chomping
|
|
|
|
// is not Strip)
|
|
|
|
else
|
|
|
|
{
|
2014-07-26 00:41:42 +00:00
|
|
|
reader_.sliceBuilder.write(lineBreak);
|
2014-07-26 00:33:16 +00:00
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-26 00:33:16 +00:00
|
|
|
const slice = reader_.sliceBuilder.finish();
|
|
|
|
|
|
|
|
return scalarToken(startMark, endMark, slice.utf32To8, style);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 19:12:33 +00:00
|
|
|
/// Scan chomping and indentation indicators of a scalar token.
|
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-24 01:50:39 +00:00
|
|
|
Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark)
|
2014-07-25 19:12:33 +00:00
|
|
|
@safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
auto chomping = Chomping.Clip;
|
|
|
|
int increment = int.min;
|
2014-07-25 19:12:33 +00:00
|
|
|
dchar c = reader_.peek();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-25 19:12:33 +00:00
|
|
|
/// Indicators can be in any order.
|
|
|
|
if(getChomping(c, chomping))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 19:12:33 +00:00
|
|
|
getIncrement(c, increment, startMark);
|
|
|
|
if(error_) { return tuple(Chomping.init, int.max); }
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const gotIncrement = getIncrement(c, increment, startMark);
|
|
|
|
if(error_) { return tuple(Chomping.init, int.max); }
|
|
|
|
if(gotIncrement) { getChomping(c, chomping); }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 19:12:33 +00:00
|
|
|
if(!" \0\n\r\u0085\u2028\u2029"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 19:12:33 +00:00
|
|
|
setError("While scanning a block scalar", startMark,
|
|
|
|
buildMsg("expected chomping or indentation indicator, but found ", c),
|
|
|
|
reader_.mark);
|
|
|
|
return tuple(Chomping.init, int.max);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 19:12:33 +00:00
|
|
|
return tuple(chomping, increment);
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-25 19:12:33 +00:00
|
|
|
/// Get chomping indicator, if detected. Return false otherwise.
|
|
|
|
///
|
|
|
|
/// Used in scanBlockScalarIndicators.
|
|
|
|
///
|
|
|
|
/// Params:
|
|
|
|
///
|
|
|
|
/// c = The character that may be a chomping indicator.
|
|
|
|
/// chomping = Write the chomping value here, if detected.
|
|
|
|
bool getChomping(ref dchar c, ref Chomping chomping) @safe pure nothrow @nogc
|
|
|
|
{
|
|
|
|
if(!"+-"d.canFind(c)) { return false; }
|
|
|
|
chomping = c == '+' ? Chomping.Keep : Chomping.Strip;
|
|
|
|
reader_.forward();
|
|
|
|
c = reader_.peek();
|
|
|
|
return true;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-25 19:12:33 +00:00
|
|
|
/// Get increment indicator, if detected. Return false otherwise.
|
|
|
|
///
|
|
|
|
/// Used in scanBlockScalarIndicators.
|
|
|
|
///
|
|
|
|
/// Params:
|
|
|
|
///
|
|
|
|
/// c = The character that may be an increment indicator.
|
|
|
|
/// If an increment indicator is detected, this will be updated to
|
|
|
|
/// the next character in the Reader.
|
|
|
|
/// increment = Write the increment value here, if detected.
|
|
|
|
/// startMark = Mark for error messages.
|
2014-07-25 19:22:44 +00:00
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-25 19:12:33 +00:00
|
|
|
bool getIncrement(ref dchar c, ref int increment, const Mark startMark)
|
|
|
|
@safe pure nothrow @nogc
|
|
|
|
{
|
|
|
|
if(!c.isDigit) { return false; }
|
|
|
|
// Convert a digit to integer.
|
|
|
|
increment = c - '0';
|
|
|
|
assert(increment < 10 && increment >= 0, "Digit has invalid value");
|
|
|
|
if(increment == 0)
|
|
|
|
{
|
|
|
|
setError("While scanning a block scalar", startMark,
|
|
|
|
"expected indentation indicator in range 1-9, but found 0",
|
|
|
|
reader_.mark);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
reader_.forward();
|
|
|
|
c = reader_.peek();
|
|
|
|
return true;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 19:22:44 +00:00
|
|
|
/// Scan (and ignore) ignored line in a block scalar.
|
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
|
|
|
void scanBlockScalarIgnoredLine(const Mark startMark) @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
findNextNonSpace();
|
2014-07-25 19:22:44 +00:00
|
|
|
if(reader_.peek()== '#') { scanToNextBreak(); }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-25 19:22:44 +00:00
|
|
|
if(!"\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
|
|
|
|
{
|
|
|
|
setError("While scanning a block scalar", startMark,
|
|
|
|
buildMsg("expected comment or line break, but found ", reader_.peek()),
|
|
|
|
reader_.mark);
|
|
|
|
return;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
scanLineBreak();
|
|
|
|
}
|
|
|
|
|
2014-07-25 21:40:18 +00:00
|
|
|
/// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
|
2014-07-25 21:45:47 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
|
|
|
Tuple!(uint, Mark) scanBlockScalarIndentationToSlice()
|
2014-07-25 21:40:18 +00:00
|
|
|
@system pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
uint maxIndent;
|
|
|
|
Mark endMark = reader_.mark;
|
|
|
|
|
2011-10-23 22:46:35 +00:00
|
|
|
while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(reader_.peek() != ' ')
|
|
|
|
{
|
2014-07-25 21:40:18 +00:00
|
|
|
reader_.sliceBuilder.write(scanLineBreak());
|
2011-08-16 12:53:13 +00:00
|
|
|
endMark = reader_.mark;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
reader_.forward();
|
|
|
|
maxIndent = max(reader_.column, maxIndent);
|
|
|
|
}
|
|
|
|
|
2014-07-25 21:45:47 +00:00
|
|
|
return tuple(maxIndent, endMark);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 21:48:28 +00:00
|
|
|
/// Scan line breaks at lower or specified indentation in a block scalar.
|
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
|
|
|
Mark scanBlockScalarBreaksToSlice(const uint indent) @trusted pure nothrow @nogc
|
|
|
|
{
|
|
|
|
Mark endMark = reader_.mark;
|
|
|
|
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
while(reader_.column < indent && reader_.peek() == ' ') { reader_.forward(); }
|
|
|
|
if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
|
|
|
|
reader_.sliceBuilder.write(scanLineBreak());
|
|
|
|
endMark = reader_.mark;
|
|
|
|
}
|
|
|
|
|
|
|
|
return endMark;
|
|
|
|
}
|
|
|
|
|
2014-07-23 01:09:48 +00:00
|
|
|
/// Scan a qouted flow scalar token with specified quotes.
|
2014-07-24 21:21:59 +00:00
|
|
|
///
|
2014-07-25 14:54:06 +00:00
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-24 21:21:59 +00:00
|
|
|
Token scanFlowScalar(const ScalarStyle quotes) @trusted pure nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const startMark = reader_.mark;
|
2014-07-24 01:50:39 +00:00
|
|
|
const quote = reader_.get();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
reader_.sliceBuilder.begin();
|
2014-07-24 21:21:59 +00:00
|
|
|
scope(exit) if(error_) { reader_.sliceBuilder.finish(); }
|
2011-10-23 18:17:37 +00:00
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
scanFlowScalarNonSpacesToSlice(quotes, startMark);
|
2014-07-24 21:21:59 +00:00
|
|
|
if(error_) { return Token.init; }
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
while(reader_.peek() != quote)
|
|
|
|
{
|
2014-07-24 16:46:21 +00:00
|
|
|
scanFlowScalarSpacesToSlice(startMark);
|
2014-07-24 21:21:59 +00:00
|
|
|
if(error_) { return Token.init; }
|
2014-07-24 16:46:21 +00:00
|
|
|
scanFlowScalarNonSpacesToSlice(quotes, startMark);
|
2014-07-24 21:21:59 +00:00
|
|
|
if(error_) { return Token.init; }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
reader_.forward();
|
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
auto slice = reader_.sliceBuilder.finish();
|
|
|
|
return scalarToken(startMark, reader_.mark, slice.utf32To8, quotes);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
/// Scan nonspace characters in a flow scalar.
|
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
2014-07-24 21:20:31 +00:00
|
|
|
///
|
2014-07-25 14:54:06 +00:00
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-24 16:46:21 +00:00
|
|
|
void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
|
2014-07-24 21:21:07 +00:00
|
|
|
@system pure nothrow @nogc
|
2011-10-23 18:17:37 +00:00
|
|
|
{
|
2014-07-24 16:46:21 +00:00
|
|
|
for(;;) with(ScalarStyle)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
dchar c = reader_.peek();
|
2011-10-24 18:36:26 +00:00
|
|
|
|
|
|
|
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
|
|
|
|
|
2014-07-24 21:21:07 +00:00
|
|
|
size_t length = 0;
|
2014-07-24 16:46:21 +00:00
|
|
|
// This is an optimized way of writing:
|
|
|
|
// while(!search.canFind(reader_.peek(length))){++length;}
|
2011-10-29 16:21:44 +00:00
|
|
|
outer: for(;;)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-29 16:21:44 +00:00
|
|
|
const slice = reader_.slice(length, length + 32);
|
2014-07-24 16:56:22 +00:00
|
|
|
if(slice.empty)
|
|
|
|
{
|
|
|
|
setError("While reading a flow scalar", startMark,
|
|
|
|
"reached end of file", reader_.mark);
|
|
|
|
return;
|
|
|
|
}
|
2011-10-29 16:21:44 +00:00
|
|
|
foreach(ch; slice)
|
|
|
|
{
|
2014-07-24 16:46:21 +00:00
|
|
|
if(search.canFind(ch)) { break outer; }
|
2011-10-29 16:21:44 +00:00
|
|
|
++length;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
reader_.sliceBuilder.write(reader_.get(length));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
c = reader_.peek();
|
2014-07-24 16:46:21 +00:00
|
|
|
if(quotes == SingleQuoted && c == '\'' && reader_.peek(1) == '\'')
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
reader_.forward(2);
|
2014-07-24 16:46:21 +00:00
|
|
|
reader_.sliceBuilder.write('\'');
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-24 16:46:21 +00:00
|
|
|
else if((quotes == DoubleQuoted && c == '\'') ||
|
|
|
|
(quotes == SingleQuoted && "\"\\"d.canFind(c)))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
reader_.forward();
|
2014-07-24 16:46:21 +00:00
|
|
|
reader_.sliceBuilder.write(c);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-24 16:46:21 +00:00
|
|
|
else if(quotes == DoubleQuoted && c == '\\')
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
reader_.forward();
|
|
|
|
c = reader_.peek();
|
2014-07-24 21:21:07 +00:00
|
|
|
if(dyaml.escapes.escapes.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
reader_.forward();
|
2014-07-24 21:21:07 +00:00
|
|
|
reader_.sliceBuilder.write(dyaml.escapes.fromEscape(c));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-24 21:21:07 +00:00
|
|
|
else if(dyaml.escapes.escapeHexCodeList.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 21:21:07 +00:00
|
|
|
const hexLength = dyaml.escapes.escapeHexLength(c);
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_.forward();
|
|
|
|
|
2014-07-24 21:21:07 +00:00
|
|
|
foreach(i; 0 .. hexLength) if(!reader_.peek(i).isHexDigit())
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 21:21:07 +00:00
|
|
|
setError("While scanning a double quoted scalar", startMark,
|
|
|
|
"found an unexpected character; expected escape "
|
|
|
|
"sequence of hexadecimal numbers.", reader_.mark);
|
2014-07-24 16:56:22 +00:00
|
|
|
return;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-24 21:21:07 +00:00
|
|
|
dchar[] hex = reader_.get(hexLength);
|
|
|
|
bool overflow;
|
|
|
|
const decoded = cast(dchar)parseNoGC!int(hex, 16u, overflow);
|
|
|
|
if(overflow)
|
|
|
|
{
|
|
|
|
setError("While scanning a double quoted scalar", startMark,
|
|
|
|
"overflow when parsing an escape sequence of "
|
|
|
|
"hexadecimal numbers.", reader_.mark);
|
2014-07-25 14:54:46 +00:00
|
|
|
return;
|
2014-07-24 21:21:07 +00:00
|
|
|
}
|
|
|
|
reader_.sliceBuilder.write(decoded);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2011-10-23 22:46:35 +00:00
|
|
|
else if("\n\r\u0085\u2028\u2029"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
scanLineBreak();
|
2014-07-24 16:46:21 +00:00
|
|
|
scanFlowScalarBreaksToSlice(startMark);
|
2014-07-24 16:56:22 +00:00
|
|
|
if(error_) { return; }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2014-07-24 16:56:22 +00:00
|
|
|
setError("While scanning a double quoted scalar", startMark,
|
2014-07-25 16:27:21 +00:00
|
|
|
buildMsg("found unsupported escape " "character", c),
|
|
|
|
reader_.mark);
|
2014-07-24 16:56:22 +00:00
|
|
|
return;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
2014-07-24 16:46:21 +00:00
|
|
|
else { return; }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-23 00:17:19 +00:00
|
|
|
/// Scan space characters in a flow scalar.
|
2014-07-24 16:46:21 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// spaces into that slice.
|
2014-07-24 21:20:31 +00:00
|
|
|
///
|
2014-07-25 14:54:06 +00:00
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-24 16:49:34 +00:00
|
|
|
void scanFlowScalarSpacesToSlice(const Mark startMark)
|
|
|
|
@system pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:17:19 +00:00
|
|
|
// Increase length as long as we see whitespace.
|
2014-07-24 16:46:21 +00:00
|
|
|
size_t length = 0;
|
2014-07-23 00:17:19 +00:00
|
|
|
while(" \t"d.canFind(reader_.peek(length))) { ++length; }
|
2014-07-24 16:46:21 +00:00
|
|
|
auto whitespaces = reader_.prefix(length + 1);
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2011-10-29 16:21:44 +00:00
|
|
|
const c = whitespaces[$ - 1];
|
2014-07-24 16:49:34 +00:00
|
|
|
if(c == '\0')
|
|
|
|
{
|
|
|
|
setError("While scanning a quoted scalar", startMark,
|
|
|
|
"found unexpected end of buffer", reader_.mark);
|
|
|
|
return;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
// Spaces not followed by a line break.
|
|
|
|
if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
|
2011-10-29 16:21:44 +00:00
|
|
|
{
|
|
|
|
reader_.forward(length);
|
2014-07-24 16:46:21 +00:00
|
|
|
reader_.sliceBuilder.write(whitespaces[0 .. $ - 1]);
|
|
|
|
return;
|
2011-10-29 16:21:44 +00:00
|
|
|
}
|
2014-07-24 16:46:21 +00:00
|
|
|
|
|
|
|
// There's a line break after the spaces.
|
|
|
|
reader_.forward(length);
|
|
|
|
const lineBreak = scanLineBreak();
|
|
|
|
|
|
|
|
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
|
|
|
|
|
|
|
|
// If we have extra line breaks after the first, scan them into the
|
|
|
|
// slice.
|
|
|
|
const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
|
2014-07-24 16:49:34 +00:00
|
|
|
if(error_) { return; }
|
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
// No extra breaks, one normal line break. Replace it with a space.
|
|
|
|
if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:17:19 +00:00
|
|
|
/// Scan line breaks in a flow scalar.
|
2014-07-24 16:46:21 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// line breaks into that slice.
|
|
|
|
///
|
2014-07-25 14:54:06 +00:00
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-24 16:46:21 +00:00
|
|
|
bool scanFlowScalarBreaksToSlice(const Mark startMark)
|
|
|
|
@system pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 16:46:21 +00:00
|
|
|
// True if at least one line break was found.
|
|
|
|
bool anyBreaks;
|
2011-08-16 12:53:13 +00:00
|
|
|
for(;;)
|
|
|
|
{
|
2014-07-23 00:17:19 +00:00
|
|
|
// Instead of checking indentation, we check for document separators.
|
2011-08-16 12:53:13 +00:00
|
|
|
const prefix = reader_.prefix(3);
|
2014-07-22 00:12:18 +00:00
|
|
|
if((prefix == "---"d || prefix == "..."d) &&
|
2011-10-23 22:46:35 +00:00
|
|
|
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3)))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 16:46:21 +00:00
|
|
|
setError("While scanning a quoted scalar", startMark,
|
|
|
|
"found unexpected document separator", reader_.mark);
|
|
|
|
return false;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
// Skip any whitespaces.
|
2014-07-23 00:17:19 +00:00
|
|
|
while(" \t"d.canFind(reader_.peek())) { reader_.forward(); }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 16:46:21 +00:00
|
|
|
// Encountered a non-whitespace non-linebreak character, so we're done.
|
|
|
|
if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
|
|
|
|
|
|
|
|
const lineBreak = scanLineBreak();
|
|
|
|
anyBreaks = true;
|
|
|
|
reader_.sliceBuilder.write(lineBreak);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-24 16:46:21 +00:00
|
|
|
return anyBreaks;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan plain scalar token (no block, no quotes).
|
2014-07-24 16:44:20 +00:00
|
|
|
///
|
2014-07-25 14:54:06 +00:00
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-24 01:50:39 +00:00
|
|
|
Token scanPlain() @trusted pure nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:46:36 +00:00
|
|
|
// We keep track of the allowSimpleKey_ flag here.
|
|
|
|
// Indentation rules are loosed for the flow context
|
2011-08-16 12:53:13 +00:00
|
|
|
const startMark = reader_.mark;
|
|
|
|
Mark endMark = startMark;
|
|
|
|
const indent = indent_ + 1;
|
|
|
|
|
2014-07-23 00:46:36 +00:00
|
|
|
// We allow zero indentation for scalars, but then we need to check for
|
|
|
|
// document separators at the beginning of the line.
|
|
|
|
// if(indent == 0) { indent = 1; }
|
2011-10-26 04:30:10 +00:00
|
|
|
|
|
|
|
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d search;
|
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
reader_.sliceBuilder.begin();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
alias Transaction = SliceBuilder.Transaction;
|
|
|
|
Transaction spacesTransaction;
|
|
|
|
// Stop at a comment.
|
|
|
|
while(reader_.peek() != '#')
|
|
|
|
{
|
|
|
|
// Scan the entire plain scalar.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint length = 0;
|
|
|
|
dchar c;
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
c = reader_.peek(length);
|
2014-07-22 00:12:18 +00:00
|
|
|
const bool done = search.canFind(c) || (flowLevel_ == 0 && c == ':' &&
|
2011-10-30 19:24:43 +00:00
|
|
|
search.canFind(reader_.peek(length + 1))) ||
|
|
|
|
(flowLevel_ > 0 && ",:?[]{}"d.canFind(c));
|
2014-07-23 00:46:36 +00:00
|
|
|
if(done) { break; }
|
2011-08-16 12:53:13 +00:00
|
|
|
++length;
|
|
|
|
}
|
|
|
|
|
2014-07-23 00:46:36 +00:00
|
|
|
// It's not clear what we should do with ':' in the flow context.
|
2011-08-16 12:53:13 +00:00
|
|
|
if(flowLevel_ > 0 && c == ':' &&
|
2011-10-26 04:30:10 +00:00
|
|
|
!search.canFind(reader_.peek(length + 1)) &&
|
2011-10-23 22:46:35 +00:00
|
|
|
!",[]{}"d.canFind(reader_.peek(length + 1)))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 00:44:43 +00:00
|
|
|
// This is an error; throw the slice away.
|
|
|
|
spacesTransaction.commit();
|
|
|
|
reader_.sliceBuilder.finish();
|
2011-08-16 12:53:13 +00:00
|
|
|
reader_.forward(length);
|
2014-07-24 16:44:07 +00:00
|
|
|
setError("While scanning a plain scalar", startMark,
|
|
|
|
"found unexpected ':' . Please check "
|
|
|
|
"http://pyyaml.org/wiki/YAMLColonInFlowContext "
|
|
|
|
"for details.", reader_.mark);
|
2014-07-24 00:44:43 +00:00
|
|
|
return Token.init;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:46:36 +00:00
|
|
|
if(length == 0) { break; }
|
2014-07-24 00:44:43 +00:00
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
allowSimpleKey_ = false;
|
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
reader_.sliceBuilder.write(reader_.get(length));
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
endMark = reader_.mark;
|
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
spacesTransaction.commit();
|
|
|
|
spacesTransaction = Transaction(reader_.sliceBuilder);
|
|
|
|
|
2014-07-26 00:49:52 +00:00
|
|
|
const startLength = reader_.sliceBuilder.length;
|
|
|
|
scanPlainSpacesToSlice(startMark);
|
|
|
|
if(startLength == reader_.sliceBuilder.length ||
|
|
|
|
(flowLevel_ == 0 && reader_.column < indent))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2014-07-24 00:44:43 +00:00
|
|
|
|
|
|
|
spacesTransaction.__dtor();
|
2014-07-25 16:27:21 +00:00
|
|
|
const slice = reader_.sliceBuilder.finish();
|
2014-07-24 00:44:43 +00:00
|
|
|
|
|
|
|
return scalarToken(startMark, endMark, slice.utf32To8, ScalarStyle.Plain);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:17:19 +00:00
|
|
|
/// Scan spaces in a plain scalar.
|
2014-07-24 00:44:43 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the spaces
|
|
|
|
/// into that slice.
|
2014-07-26 00:49:52 +00:00
|
|
|
void scanPlainSpacesToSlice(const Mark startMark) @system pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:17:19 +00:00
|
|
|
// The specification is really confusing about tabs in plain scalars.
|
|
|
|
// We just forbid them completely. Do not use tabs in YAML!
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
// Get as many plain spaces as there are.
|
|
|
|
size_t length = 0;
|
2014-07-23 00:46:36 +00:00
|
|
|
while(reader_.peek(length) == ' ') { ++length; }
|
2014-07-24 01:06:24 +00:00
|
|
|
dchar[] whitespaces = reader_.get(length);
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
dchar c = reader_.peek();
|
2014-07-24 00:44:43 +00:00
|
|
|
// No newline after the spaces (if any)
|
|
|
|
if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 00:44:43 +00:00
|
|
|
// We have spaces, but no newline.
|
2014-07-26 00:49:52 +00:00
|
|
|
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
|
|
|
|
return;
|
2014-07-24 00:44:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Newline after the spaces (if any)
|
|
|
|
const lineBreak = scanLineBreak();
|
|
|
|
allowSimpleKey_ = true;
|
|
|
|
|
|
|
|
static bool end(Reader reader_) @safe pure nothrow @nogc
|
|
|
|
{
|
|
|
|
return ("---"d == reader_.prefix(3) || "..."d == reader_.prefix(3))
|
|
|
|
&& " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-26 00:49:52 +00:00
|
|
|
if(end(reader_)) { return; }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
bool extraBreaks = false;
|
|
|
|
|
|
|
|
alias Transaction = SliceBuilder.Transaction;
|
|
|
|
auto transaction = Transaction(reader_.sliceBuilder);
|
|
|
|
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
|
|
|
|
while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
|
|
|
|
{
|
|
|
|
if(reader_.peek() == ' ') { reader_.forward(); }
|
|
|
|
else
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-24 00:44:43 +00:00
|
|
|
const lBreak = scanLineBreak();
|
|
|
|
extraBreaks = true;
|
|
|
|
reader_.sliceBuilder.write(lBreak);
|
2014-07-26 00:49:52 +00:00
|
|
|
|
|
|
|
if(end(reader_)) { return; }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
2014-07-24 00:44:43 +00:00
|
|
|
transaction.commit();
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-24 00:44:43 +00:00
|
|
|
// No line breaks, only a space.
|
|
|
|
if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan handle of a tag token.
|
2014-07-25 00:32:29 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
2014-07-25 14:55:15 +00:00
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-25 19:15:14 +00:00
|
|
|
void scanTagHandleToSlice(string name)(const Mark startMark)
|
2014-07-25 14:55:15 +00:00
|
|
|
@system pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
dchar c = reader_.peek();
|
2014-07-25 14:55:15 +00:00
|
|
|
enum contextMsg = "While scanning a " ~ name;
|
|
|
|
if(c != '!')
|
|
|
|
{
|
2014-07-25 19:15:14 +00:00
|
|
|
setError(contextMsg, startMark,
|
2014-07-25 16:27:21 +00:00
|
|
|
buildMsg("expected a '!', but found: ", c), reader_.mark);
|
2014-07-25 14:55:15 +00:00
|
|
|
return;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
uint length = 1;
|
|
|
|
c = reader_.peek(length);
|
|
|
|
if(c != ' ')
|
|
|
|
{
|
2014-07-25 00:32:29 +00:00
|
|
|
while(c.isAlphaNum || "-_"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
++length;
|
|
|
|
c = reader_.peek(length);
|
|
|
|
}
|
|
|
|
if(c != '!')
|
|
|
|
{
|
|
|
|
reader_.forward(length);
|
2014-07-25 19:15:14 +00:00
|
|
|
setError(contextMsg, startMark,
|
2014-07-25 16:27:21 +00:00
|
|
|
buildMsg("expected a '!', but found: ", c), reader_.mark);
|
2014-07-25 14:55:15 +00:00
|
|
|
return;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
++length;
|
|
|
|
}
|
2014-07-25 00:32:29 +00:00
|
|
|
|
|
|
|
reader_.sliceBuilder.write(reader_.get(length));
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan URI in a tag token.
|
2014-07-25 00:51:06 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
2014-07-25 15:33:54 +00:00
|
|
|
///
|
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
|
|
|
void scanTagURIToSlice(string name)(const Mark startMark) @trusted pure nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:46:36 +00:00
|
|
|
// Note: we do not check if URI is well-formed.
|
2011-08-16 12:53:13 +00:00
|
|
|
dchar c = reader_.peek();
|
2014-07-26 00:49:52 +00:00
|
|
|
const startLen = reader_.sliceBuilder.length;
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:34:53 +00:00
|
|
|
uint length = 0;
|
2014-07-25 15:33:54 +00:00
|
|
|
while(c.isAlphaNum || "-;/?:@&=+$,_.!~*\'()[]%"d.canFind(c))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:34:53 +00:00
|
|
|
if(c == '%')
|
|
|
|
{
|
|
|
|
auto chars = reader_.get(length);
|
|
|
|
reader_.sliceBuilder.write(chars);
|
|
|
|
length = 0;
|
2014-07-26 00:49:52 +00:00
|
|
|
scanURIEscapesToSlice!name(startMark);
|
2014-07-25 15:33:54 +00:00
|
|
|
if(error_) { return; }
|
2014-07-25 00:34:53 +00:00
|
|
|
}
|
|
|
|
else { ++length; }
|
|
|
|
c = reader_.peek(length);
|
|
|
|
}
|
|
|
|
if(length > 0)
|
|
|
|
{
|
|
|
|
auto chars = reader_.get(length);
|
|
|
|
reader_.sliceBuilder.write(chars);
|
2011-08-16 12:53:13 +00:00
|
|
|
length = 0;
|
|
|
|
}
|
|
|
|
}
|
2014-07-26 00:49:52 +00:00
|
|
|
// OK if we scanned something, error otherwise.
|
|
|
|
if(reader_.sliceBuilder.length > startLen) { return; }
|
2014-07-25 15:33:54 +00:00
|
|
|
|
|
|
|
enum contextMsg = "While parsing a " ~ name;
|
2014-07-25 16:27:21 +00:00
|
|
|
setError(contextMsg, startMark, buildMsg("expected URI, but found: ", c),
|
2014-07-25 15:33:54 +00:00
|
|
|
reader_.mark);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 15:33:54 +00:00
|
|
|
// Not @nogc yet because std.utf.decode is not @nogc
|
2014-07-23 00:47:46 +00:00
|
|
|
/// Scan URI escape sequences.
|
2014-07-25 00:49:33 +00:00
|
|
|
///
|
|
|
|
/// Assumes that the caller is building a slice in Reader, and puts the scanned
|
|
|
|
/// characters into that slice.
|
|
|
|
///
|
2014-07-25 15:33:54 +00:00
|
|
|
/// In case of an error, error_ is set. Use throwIfError() to handle this.
|
2014-07-26 00:49:52 +00:00
|
|
|
void scanURIEscapesToSlice(string name)(const Mark startMark)
|
2014-07-25 19:15:14 +00:00
|
|
|
@system pure nothrow // @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:34:53 +00:00
|
|
|
// URI escapes encode a UTF-8 string. We store UTF-8 code units here for
|
|
|
|
// decoding into UTF-32.
|
|
|
|
char[4] bytes;
|
|
|
|
size_t bytesUsed;
|
2011-08-16 12:53:13 +00:00
|
|
|
Mark mark = reader_.mark;
|
|
|
|
|
2014-07-25 00:34:53 +00:00
|
|
|
// Get one dchar by decoding data from bytes.
|
|
|
|
//
|
|
|
|
// This is probably slow, but simple and URI escapes are extremely uncommon
|
|
|
|
// in YAML.
|
2014-07-26 00:49:52 +00:00
|
|
|
static size_t getDchar(char[] bytes, Reader reader_)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:34:53 +00:00
|
|
|
import std.utf;
|
|
|
|
size_t nextChar;
|
2014-07-25 00:49:33 +00:00
|
|
|
const c = std.utf.decode(bytes[], nextChar);
|
2014-07-25 00:34:53 +00:00
|
|
|
reader_.sliceBuilder.write(c);
|
|
|
|
if(bytes.length - nextChar > 0)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:49:33 +00:00
|
|
|
core.stdc.string.memmove(bytes.ptr, bytes.ptr + nextChar,
|
2014-07-25 00:34:53 +00:00
|
|
|
bytes.length - nextChar);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2014-07-25 00:34:53 +00:00
|
|
|
return bytes.length - nextChar;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 15:33:54 +00:00
|
|
|
enum contextMsg = "While scanning a " ~ name;
|
2014-07-25 00:49:33 +00:00
|
|
|
try
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 00:34:53 +00:00
|
|
|
while(reader_.peek() == '%')
|
|
|
|
{
|
|
|
|
reader_.forward();
|
|
|
|
if(bytesUsed == bytes.length)
|
|
|
|
{
|
2014-07-26 00:49:52 +00:00
|
|
|
bytesUsed = getDchar(bytes[], reader_);
|
2014-07-25 00:34:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
char b = 0;
|
|
|
|
uint mult = 16;
|
|
|
|
// Converting 2 hexadecimal digits to a byte.
|
|
|
|
foreach(k; 0 .. 2)
|
|
|
|
{
|
|
|
|
const dchar c = reader_.peek(k);
|
2014-07-25 15:33:54 +00:00
|
|
|
if(!c.isHexDigit)
|
|
|
|
{
|
2014-07-25 16:27:21 +00:00
|
|
|
auto msg = buildMsg("expected URI escape sequence of 2 "
|
|
|
|
"hexadecimal numbers, but found: ", c);
|
|
|
|
setError(contextMsg, startMark, msg, reader_.mark);
|
2014-07-26 00:49:52 +00:00
|
|
|
return;
|
2014-07-25 15:33:54 +00:00
|
|
|
}
|
2014-07-25 00:34:53 +00:00
|
|
|
|
|
|
|
uint digit;
|
|
|
|
if(c - '0' < 10) { digit = c - '0'; }
|
|
|
|
else if(c - 'A' < 6) { digit = c - 'A'; }
|
|
|
|
else if(c - 'a' < 6) { digit = c - 'a'; }
|
|
|
|
else { assert(false); }
|
|
|
|
b += mult * digit;
|
|
|
|
mult /= 16;
|
|
|
|
}
|
|
|
|
bytes[bytesUsed++] = b;
|
|
|
|
|
|
|
|
reader_.forward(2);
|
|
|
|
}
|
|
|
|
|
2014-07-26 00:49:52 +00:00
|
|
|
bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
2012-08-30 13:38:51 +00:00
|
|
|
catch(UTFException e)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-25 15:33:54 +00:00
|
|
|
setError(contextMsg, startMark, e.msg, mark);
|
2014-07-26 00:49:52 +00:00
|
|
|
return;
|
2014-07-25 15:33:54 +00:00
|
|
|
}
|
|
|
|
catch(Exception e)
|
|
|
|
{
|
|
|
|
assert(false, "Unexpected exception in scanURIEscapesToSlice");
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-23 00:17:19 +00:00
|
|
|
/// Scan a line break, if any.
|
|
|
|
///
|
|
|
|
/// Transforms:
|
|
|
|
/// '\r\n' : '\n'
|
|
|
|
/// '\r' : '\n'
|
|
|
|
/// '\n' : '\n'
|
|
|
|
/// '\u0085' : '\n'
|
|
|
|
/// '\u2028' : '\u2028'
|
|
|
|
/// '\u2029 : '\u2029'
|
|
|
|
/// no break : '\0'
|
2014-07-23 00:47:46 +00:00
|
|
|
dchar scanLineBreak() @safe pure nothrow @nogc
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const c = reader_.peek();
|
|
|
|
|
2011-10-26 04:30:10 +00:00
|
|
|
if(c == '\n' || c == '\r' || c == '\u0085')
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-23 00:17:19 +00:00
|
|
|
if(reader_.prefix(2) == "\r\n"d) { reader_.forward(2); }
|
|
|
|
else { reader_.forward(); }
|
2011-08-16 12:53:13 +00:00
|
|
|
return '\n';
|
|
|
|
}
|
2011-10-26 04:30:10 +00:00
|
|
|
if(c == '\u2028' || c == '\u2029')
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
reader_.forward();
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
return '\0';
|
|
|
|
}
|
|
|
|
}
|
2014-07-23 16:18:21 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
|
2014-07-24 01:06:24 +00:00
|
|
|
/// A nothrow function that converts a dchar[] to a string.
|
|
|
|
string utf32To8(C)(C[] str) @safe pure nothrow
|
|
|
|
if(is(Unqual!C == dchar))
|
2014-07-23 16:18:21 +00:00
|
|
|
{
|
|
|
|
try { return str.to!string; }
|
|
|
|
catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); }
|
|
|
|
catch(Exception e) { assert(false, "Unexpected exception during UTF-8 encoding"); }
|
|
|
|
}
|
|
|
|
|