dyaml/source/dyaml/scanner.d


//          Copyright Ferdinand Majerech 2011-2014.
// Distributed under the Boost Software License, Version 1.0.
//    (See accompanying file LICENSE_1_0.txt or copy at
//          http://www.boost.org/LICENSE_1_0.txt)

/// YAML scanner.
/// Code based on PyYAML: http://www.pyyaml.org
module dyaml.scanner;


import core.stdc.string;

import std.algorithm;
import std.array;
import std.conv;
import std.ascii : isAlphaNum, isDigit, isHexDigit;
import std.exception;
import std.string;
import std.typecons;
import std.traits : Unqual;
import std.utf;

import dyaml.escapes;
import dyaml.exception;
import dyaml.queue;
import dyaml.reader;
import dyaml.style;
import dyaml.token;

package:
/// Scanner produces tokens of the following types:
/// STREAM-START
/// STREAM-END
/// DIRECTIVE(name, value)
/// DOCUMENT-START
/// DOCUMENT-END
/// BLOCK-SEQUENCE-START
/// BLOCK-MAPPING-START
/// BLOCK-END
/// FLOW-SEQUENCE-START
/// FLOW-MAPPING-START
/// FLOW-SEQUENCE-END
/// FLOW-MAPPING-END
/// BLOCK-ENTRY
/// FLOW-ENTRY
/// KEY
/// VALUE
/// ALIAS(value)
/// ANCHOR(value)
/// TAG(value)
/// SCALAR(value, plain, style)

alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');

alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');

alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');

alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}',
    '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n',
    '\r', '\u0085', '\u2028', '\u2029');

alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',',
    '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');

alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');

alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');

alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');

/// Marked exception thrown at scanner errors.
///
/// See_Also: MarkedYAMLException
class ScannerException : MarkedYAMLException
{
    mixin MarkedExceptionCtors;
}

/// Generates tokens from data provided by a Reader.
final class Scanner
{
    private:
        /// A simple key is a key that is not denoted by the '?' indicator.
        /// For example:
        ///   ---
        ///   block simple key: value
        ///   ? not a simple key:
        ///   : { flow simple key: value }
        /// We emit the KEY token before all keys, so when we find a potential simple
        /// key, we try to locate the corresponding ':' indicator. Simple keys should be
        /// limited to a single line and 1024 characters.
        ///
        /// 16 bytes on 64-bit.
        static struct SimpleKey
        {
            /// Character index in reader where the key starts.
            uint charIndex = uint.max;
            /// Index of the key token from start (first token scanned being 0).
            uint tokenIndex;
            /// Line the key starts at.
            uint line;
            /// Column the key starts at.
            ushort column;
            /// Is this required to be a simple key?
            bool required;
            /// Is this struct "null" (invalid)?.
            bool isNull;
        }

        /// Block chomping types.
        enum Chomping
        {
            /// Strip all trailing line breaks. '-' indicator.
            strip,
            /// Line break of the last line is preserved, others discarded. Default.
            clip,
            /// All trailing line breaks are preserved. '+' indicator.
            keep
        }

        /// Reader used to read from a file/stream.
        Reader reader_;
        /// Are we done scanning?
        bool done_;

        /// Level of nesting in flow context. If 0, we're in block context.
        uint flowLevel_;
        /// Current indentation level.
        int indent_ = -1;
        /// Past indentation levels. Used as a stack.
        Appender!(int[]) indents_;

        /// Processed tokens not yet emitted. Used as a queue.
        Queue!Token tokens_;

        /// Number of tokens emitted through the getToken method.
        uint tokensTaken_;

        /// Can a simple key start at the current position? A simple key may start:
        /// - at the beginning of the line, not counting indentation spaces
        ///       (in block context),
        /// - after '{', '[', ',' (in the flow context),
        /// - after '?', ':', '-' (in the block context).
        /// In the block context, this flag also signifies if a block collection
        /// may start at the current position.
        bool allowSimpleKey_ = true;

        /// Possible simple keys indexed by flow levels.
        SimpleKey[] possibleSimpleKeys_;


        /// Set on error by nothrow/@nogc inner functions along with errorData_.
        ///
        /// Non-nothrow/GC-using caller functions can then throw an exception using
        /// data stored in errorData_.
        bool error_;

        /// Data for the exception to throw if error_ is true.
        MarkedYAMLExceptionData errorData_;

        /// Error messages can be built in this buffer without using the GC.
        ///
        /// ScannerException (MarkedYAMLException) copies string data passed to its
        /// constructor so it's safe to use slices of this buffer as parameters for
        /// exceptions that may outlive the Scanner. The GC allocation when creating the
        /// error message is removed, but the allocation when creating an exception is
        /// not.
        char[256] msgBuffer_;

    public:
        /// Construct a Scanner using specified Reader.
        this(Reader reader) @safe nothrow
        {
            // Return the next token, but do not delete it from the queue
            reader_   = reader;
            fetchStreamStart();
        }

        /// Check if the next token is one of specified types.
        ///
        /// If no types are specified, checks if any tokens are left.
        ///
        /// Params:  ids = Token IDs to check for.
        ///
        /// Returns: true if the next token is one of specified types, or if there are
        ///          any tokens left if no types specified, false otherwise.
        bool checkToken(const TokenID[] ids ...) @safe
        {
            // Check if the next token is one of specified types.
            while(needMoreTokens()) { fetchToken(); }
            if(!tokens_.empty)
            {
                if(ids.length == 0) { return true; }
                else
                {
                    const nextId = tokens_.peek().id;
                    foreach(id; ids)
                    {
                        if(nextId == id) { return true; }
                    }
                }
            }
            return false;
        }

        /// Return the next token, but keep it in the queue.
        ///
        /// Must not be called if there are no tokens left.
        ref const(Token) peekToken() @safe
        {
            while(needMoreTokens) { fetchToken(); }
            if(!tokens_.empty)    { return tokens_.peek(); }
            assert(false, "No token left to peek");
        }

        /// Return the next token, removing it from the queue.
        ///
        /// Must not be called if there are no tokens left.
        Token getToken() @safe
        {
            while(needMoreTokens){fetchToken();}
            if(!tokens_.empty)
            {
                ++tokensTaken_;
                return tokens_.pop();
            }
            assert(false, "No token left to get");
        }

    private:
        /// Build an error message in msgBuffer_ and return it as a string.
        string buildMsg(S ...)(S args)
        {
            try {
                return text(args);
            }
            catch (Exception)
            {
                return "";
            }
        }

        /// Most scanning error messages have the same format; so build them with this
        /// function.
        string expected(T)(string expected, T found)
        {
            return buildMsg("expected ", expected, ", but found ", found);
        }

        /// If error_ is true, throws a ScannerException constructed from errorData_ and
        /// sets error_ to false.
        void throwIfError() @safe pure
        {
            if(!error_) { return; }
            error_ = false;
            throw new ScannerException(errorData_);
        }

        /// Called by internal nothrow/@nogc methods to set an error to be thrown by
        /// their callers.
        ///
        /// See_Also: dyaml.exception.MarkedYamlException
        void error(string context, const Mark contextMark, string problem,
                   const Mark problemMark) @safe pure nothrow @nogc
        {
            assert(error_ == false,
                   "Setting an error when there already is a not yet thrown error");
            error_     = true;
            errorData_ = MarkedYAMLExceptionData(context, contextMark, problem, problemMark);
        }

        /// Determine whether or not we need to fetch more tokens before peeking/getting a token.
        bool needMoreTokens() @safe pure
        {
            if(done_)         { return false; }
            if(tokens_.empty) { return true; }

            /// The current token may be a potential simple key, so we need to look further.
            stalePossibleSimpleKeys();
            return nextPossibleSimpleKey() == tokensTaken_;
        }

        /// Fetch at token, adding it to tokens_.
        void fetchToken() @safe
        {
            // Eat whitespaces and comments until we reach the next token.
            scanToNextToken();

            // Remove obsolete possible simple keys.
            stalePossibleSimpleKeys();

            // Compare current indentation and column. It may add some tokens
            // and decrease the current indentation level.
            unwindIndent(reader_.column);

            // Get the next character.
            const dchar c = reader_.peekByte();

            // Fetch the token.
            if(c == '\0')            { return fetchStreamEnd();     }
            if(checkDirective())     { return fetchDirective();     }
            if(checkDocumentStart()) { return fetchDocumentStart(); }
            if(checkDocumentEnd())   { return fetchDocumentEnd();   }
            // Order of the following checks is NOT significant.
            switch(c)
            {
                case '[':  return fetchFlowSequenceStart();
                case '{':  return fetchFlowMappingStart();
                case ']':  return fetchFlowSequenceEnd();
                case '}':  return fetchFlowMappingEnd();
                case ',':  return fetchFlowEntry();
                case '!':  return fetchTag();
                case '\'': return fetchSingle();
                case '\"': return fetchDouble();
                case '*':  return fetchAlias();
                case '&':  return fetchAnchor();
                case '?':  if(checkKey())        { return fetchKey();        } goto default;
                case ':':  if(checkValue())      { return fetchValue();      } goto default;
                case '-':  if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
                case '|':  if(flowLevel_ == 0)   { return fetchLiteral();    } break;
                case '>':  if(flowLevel_ == 0)   { return fetchFolded();     } break;
                default:   if(checkPlain())      { return fetchPlain();      }
            }

            throw new ScannerException("While scanning for the next token, found character " ~
                                       "\'%s\', index %s that cannot start any token"
                                       .format(c, to!int(c)), reader_.mark);
        }


        /// Return the token number of the nearest possible simple key.
        uint nextPossibleSimpleKey() @safe pure nothrow @nogc
        {
            uint minTokenNumber = uint.max;
            foreach(k, ref simpleKey; possibleSimpleKeys_)
            {
                if(simpleKey.isNull) { continue; }
                minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
            }
            return minTokenNumber;
        }

        /// Remove entries that are no longer possible simple keys.
        ///
        /// According to the YAML specification, simple keys
        /// - should be limited to a single line,
        /// - should be no longer than 1024 characters.
        /// Disabling this will allow simple keys of any length and
        /// height (may cause problems if indentation is broken though).
        void stalePossibleSimpleKeys() @safe pure
        {
            foreach(level, ref key; possibleSimpleKeys_)
            {
                if(key.isNull) { continue; }
                if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
                {
                    enforce(!key.required,
                            new ScannerException("While scanning a simple key",
                                                 Mark(key.line, key.column),
                                                 "could not find expected ':'", reader_.mark));
                    key.isNull = true;
                }
            }
        }

        /// Check if the next token starts a possible simple key and if so, save its position.
        ///
        /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
        void savePossibleSimpleKey() @safe pure
        {
            // Check if a simple key is required at the current position.
            const required = (flowLevel_ == 0 && indent_ == reader_.column);
            assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~
                   "the first token in the current line. Therefore it is always allowed.");

            if(!allowSimpleKey_) { return; }

            // The next token might be a simple key, so save its number and position.
            removePossibleSimpleKey();
            const tokenCount = tokensTaken_ + cast(uint)tokens_.length;

            const line   = reader_.line;
            const column = reader_.column;
            const key    = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line,
                                     cast(ushort)min(column, ushort.max), required);

            if(possibleSimpleKeys_.length <= flowLevel_)
            {
                const oldLength = possibleSimpleKeys_.length;
                possibleSimpleKeys_.length = flowLevel_ + 1;
                //No need to initialize the last element, it's already done in the next line.
                possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
            }
            possibleSimpleKeys_[flowLevel_] = key;
        }

        /// Remove the saved possible key position at the current flow level.
        void removePossibleSimpleKey() @safe pure
        {
            if(possibleSimpleKeys_.length <= flowLevel_) { return; }

            if(!possibleSimpleKeys_[flowLevel_].isNull)
            {
                const key = possibleSimpleKeys_[flowLevel_];
                enforce(!key.required,
                        new ScannerException("While scanning a simple key",
                                             Mark(key.line, key.column),
                                             "could not find expected ':'", reader_.mark));
                possibleSimpleKeys_[flowLevel_].isNull = true;
            }
        }

        /// Decrease indentation, removing entries in indents_.
        ///
        /// Params:  column = Current column in the file/stream.
        void unwindIndent(const int column) @safe
        {
            if(flowLevel_ > 0)
            {
                // In flow context, tokens should respect indentation.
                // The condition should be `indent >= column` according to the spec.
                // But this condition will prohibit intuitively correct
                // constructions such as
                // key : {
                // }

                // In the flow context, indentation is ignored. We make the scanner less
                // restrictive than what the specification requires.
                // if(pedantic_ && flowLevel_ > 0 && indent_ > column)
                // {
                //     throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
                //                                reader_.mark)
                // }
                return;
            }

            // In block context, we may need to issue the BLOCK-END tokens.
            while(indent_ > column)
            {
                indent_ = indents_.data.back;
                assert(indents_.data.length);
                indents_.shrinkTo(indents_.data.length - 1);
                tokens_.push(blockEndToken(reader_.mark, reader_.mark));
            }
        }

        /// Increase indentation if needed.
        ///
        /// Params:  column = Current column in the file/stream.
        ///
        /// Returns: true if the indentation was increased, false otherwise.
        bool addIndent(int column) @safe
        {
            if(indent_ >= column){return false;}
            indents_ ~= indent_;
            indent_ = column;
            return true;
        }


        /// Add STREAM-START token.
        void fetchStreamStart() @safe nothrow
        {
            tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
        }

        ///Add STREAM-END token.
        void fetchStreamEnd() @safe
        {
            //Set intendation to -1 .
            unwindIndent(-1);
            removePossibleSimpleKey();
            allowSimpleKey_ = false;
            possibleSimpleKeys_.destroy;

            tokens_.push(streamEndToken(reader_.mark, reader_.mark));
            done_ = true;
        }

        /// Add DIRECTIVE token.
        void fetchDirective() @safe
        {
            // Set intendation to -1 .
            unwindIndent(-1);
            // Reset simple keys.
            removePossibleSimpleKey();
            allowSimpleKey_ = false;

            auto directive = scanDirective();
            throwIfError();
            tokens_.push(directive);
        }

        /// Add DOCUMENT-START or DOCUMENT-END token.
        void fetchDocumentIndicator(TokenID id)()
            if(id == TokenID.documentStart || id == TokenID.documentEnd)
        {
            // Set indentation to -1 .
            unwindIndent(-1);
            // Reset simple keys. Note that there can't be a block collection after '---'.
            removePossibleSimpleKey();
            allowSimpleKey_ = false;

            Mark startMark = reader_.mark;
            reader_.forward(3);
            tokens_.push(simpleToken!id(startMark, reader_.mark));
        }

        /// Aliases to add DOCUMENT-START or DOCUMENT-END token.
        alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart);
        alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd);

        /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
        void fetchFlowCollectionStart(TokenID id)() @safe
        {
            // '[' and '{' may start a simple key.
            savePossibleSimpleKey();
            // Simple keys are allowed after '[' and '{'.
            allowSimpleKey_ = true;
            ++flowLevel_;

            Mark startMark = reader_.mark;
            reader_.forward();
            tokens_.push(simpleToken!id(startMark, reader_.mark));
        }

        /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
        alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart);
        alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart);

        /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
        void fetchFlowCollectionEnd(TokenID id)()
        {
            // Reset possible simple key on the current level.
            removePossibleSimpleKey();
            // No simple keys after ']' and '}'.
            allowSimpleKey_ = false;
            --flowLevel_;

            Mark startMark = reader_.mark;
            reader_.forward();
            tokens_.push(simpleToken!id(startMark, reader_.mark));
        }

        /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
        alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd);
        alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd);

        /// Add FLOW-ENTRY token;
        void fetchFlowEntry() @safe
        {
            // Reset possible simple key on the current level.
            removePossibleSimpleKey();
            // Simple keys are allowed after ','.
            allowSimpleKey_ = true;

            Mark startMark = reader_.mark;
            reader_.forward();
            tokens_.push(flowEntryToken(startMark, reader_.mark));
        }

        /// Additional checks used in block context in fetchBlockEntry and fetchKey.
        ///
        /// Params:  type = String representing the token type we might need to add.
        ///          id   = Token type we might need to add.
        void blockChecks(string type, TokenID id)()
        {
            enum context = type ~ " keys are not allowed here";
            // Are we allowed to start a key (not neccesarily a simple one)?
            enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));

            if(addIndent(reader_.column))
            {
                tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
            }
        }

        /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
        void fetchBlockEntry() @safe
        {
            if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); }

            // It's an error for the block entry to occur in the flow context,
            // but we let the parser detect this.

            // Reset possible simple key on the current level.
            removePossibleSimpleKey();
            // Simple keys are allowed after '-'.
            allowSimpleKey_ = true;

            Mark startMark = reader_.mark;
            reader_.forward();
            tokens_.push(blockEntryToken(startMark, reader_.mark));
        }

        /// Add KEY token. Might add BLOCK-MAPPING-START in the process.
        void fetchKey() @safe
        {
            if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); }

            // Reset possible simple key on the current level.
            removePossibleSimpleKey();
            // Simple keys are allowed after '?' in the block context.
            allowSimpleKey_ = (flowLevel_ == 0);

            Mark startMark = reader_.mark;
            reader_.forward();
            tokens_.push(keyToken(startMark, reader_.mark));
        }

        /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
        void fetchValue() @safe
        {
            //Do we determine a simple key?
            if(possibleSimpleKeys_.length > flowLevel_ &&
               !possibleSimpleKeys_[flowLevel_].isNull)
            {
                const key = possibleSimpleKeys_[flowLevel_];
                possibleSimpleKeys_[flowLevel_].isNull = true;
                Mark keyMark = Mark(key.line, key.column);
                const idx = key.tokenIndex - tokensTaken_;

                assert(idx >= 0);

                // Add KEY.
                // Manually inserting since tokens are immutable (need linked list).
                tokens_.insert(keyToken(keyMark, keyMark), idx);

                // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
                if(flowLevel_ == 0 && addIndent(key.column))
                {
                    tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
                }

                // There cannot be two simple keys in a row.
                allowSimpleKey_ = false;
            }
            // Part of a complex key
            else
            {
                // We can start a complex value if and only if we can start a simple key.
                enforce(flowLevel_ > 0 || allowSimpleKey_,
                        new ScannerException("Mapping values are not allowed here", reader_.mark));

                // If this value starts a new block mapping, we need to add
                // BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
                if(flowLevel_ == 0 && addIndent(reader_.column))
                {
                    tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
                }

                // Reset possible simple key on the current level.
                removePossibleSimpleKey();
                // Simple keys are allowed after ':' in the block context.
                allowSimpleKey_ = (flowLevel_ == 0);
            }

            // Add VALUE.
            Mark startMark = reader_.mark;
            reader_.forward();
            tokens_.push(valueToken(startMark, reader_.mark));
        }

        /// Add ALIAS or ANCHOR token.
        void fetchAnchor_(TokenID id)() @safe
            if(id == TokenID.alias_ || id == TokenID.anchor)
        {
            // ALIAS/ANCHOR could be a simple key.
            savePossibleSimpleKey();
            // No simple keys after ALIAS/ANCHOR.
            allowSimpleKey_ = false;

            auto anchor = scanAnchor(id);
            throwIfError();
            tokens_.push(anchor);
        }

        /// Aliases to add ALIAS or ANCHOR token.
        alias fetchAlias = fetchAnchor_!(TokenID.alias_);
        alias fetchAnchor = fetchAnchor_!(TokenID.anchor);

        /// Add TAG token.
        void fetchTag() @safe
        {
            //TAG could start a simple key.
            savePossibleSimpleKey();
            //No simple keys after TAG.
            allowSimpleKey_ = false;

            tokens_.push(scanTag());
            throwIfError();
        }

        /// Add block SCALAR token.
        void fetchBlockScalar(ScalarStyle style)() @safe
            if(style == ScalarStyle.literal || style == ScalarStyle.folded)
        {
            // Reset possible simple key on the current level.
            removePossibleSimpleKey();
            // A simple key may follow a block scalar.
            allowSimpleKey_ = true;

            auto blockScalar = scanBlockScalar(style);
            throwIfError();
            tokens_.push(blockScalar);
        }

        /// Aliases to add literal or folded block scalar.
        alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal);
        alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded);

        /// Add quoted flow SCALAR token.
        void fetchFlowScalar(ScalarStyle quotes)()
        {
            // A flow scalar could be a simple key.
            savePossibleSimpleKey();
            // No simple keys after flow scalars.
            allowSimpleKey_ = false;

            // Scan and add SCALAR.
            auto scalar = scanFlowScalar(quotes);
            throwIfError();
            tokens_.push(scalar);
        }

        /// Aliases to add single or double quoted block scalar.
        alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted);
        alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted);

        /// Add plain SCALAR token.
        void fetchPlain() @safe
        {
            // A plain scalar could be a simple key
            savePossibleSimpleKey();
            // No simple keys after plain scalars. But note that scanPlain() will
            // change this flag if the scan is finished at the beginning of the line.
            allowSimpleKey_ = false;
            auto plain = scanPlain();
            throwIfError();

            // Scan and add SCALAR. May change allowSimpleKey_
            tokens_.push(plain);
        }

    pure:

        ///Check if the next token is DIRECTIVE:        ^ '%' ...
        bool checkDirective() @safe
        {
            return reader_.peekByte() == '%' && reader_.column == 0;
        }

        /// Check if the next token is DOCUMENT-START:   ^ '---' (' '|'\n')
        bool checkDocumentStart() @safe
        {
            // Check one char first, then all 3, to prevent reading outside the buffer.
            return reader_.column     == 0     &&
                   reader_.peekByte() == '-'   &&
                   reader_.prefix(3)  == "---" &&
                   reader_.peek(3).isWhiteSpace;
        }

        /// Check if the next token is DOCUMENT-END:     ^ '...' (' '|'\n')
        bool checkDocumentEnd() @safe
        {
            // Check one char first, then all 3, to prevent reading outside the buffer.
            return reader_.column     == 0     &&
                   reader_.peekByte() == '.'   &&
                   reader_.prefix(3)  == "..." &&
                   reader_.peek(3).isWhiteSpace;
        }

        /// Check if the next token is BLOCK-ENTRY:      '-' (' '|'\n')
        bool checkBlockEntry() @safe
        {
            return !!reader_.peek(1).isWhiteSpace;
        }

        /// Check if the next token is KEY(flow context):    '?'
        ///
        /// or KEY(block context):   '?' (' '|'\n')
        bool checkKey() @safe
        {
            return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
        }

        /// Check if the next token is VALUE(flow context):  ':'
        ///
        /// or VALUE(block context): ':' (' '|'\n')
        bool checkValue() @safe
        {
            return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
        }

        /// Check if the next token is a plain scalar.
        ///
        /// A plain scalar may start with any non-space character except:
        ///   '-', '?', ':', ',', '[', ']', '{', '}',
        ///   '#', '&', '*', '!', '|', '>', '\'', '\"',
        ///   '%', '@', '`'.
        ///
        /// It may also start with
        ///   '-', '?', ':'
        /// if it is followed by a non-space character.
        ///
        /// Note that we limit the last rule to the block context (except the
        /// '-' character) because we want the flow context to be space
        /// independent.
        bool checkPlain() @safe
        {
            const c = reader_.peek();
            if(!c.isNonScalarStartCharacter)
            {
                return true;
            }
            return !reader_.peek(1).isWhiteSpace &&
                   (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
        }

        /// Move to the next non-space character.
        void findNextNonSpace() @safe
        {
            while(reader_.peekByte() == ' ') { reader_.forward(); }
        }

        /// Scan a string of alphanumeric or "-_" characters.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanAlphaNumericToSlice(string name)(const Mark startMark)
        {
            size_t length;
            dchar c = reader_.peek();
            while(c.isAlphaNum || "-_"d.canFind(c)) { c = reader_.peek(++length); }

            if(length == 0)
            {
                enum contextMsg = "While scanning " ~ name;
                error(contextMsg, startMark, expected("alphanumeric, '-' or '_'", c),
                      reader_.mark);
                return;
            }

            reader_.sliceBuilder.write(reader_.get(length));
        }

        /// Scan and throw away all characters until next line break.
        void scanToNextBreak() @safe
        {
            while(!reader_.peek().isBreak) { reader_.forward(); }
        }

        /// Scan all characters until next line break.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        void scanToNextBreakToSlice() @safe
        {
            uint length;
            while(!reader_.peek(length).isBreak)
            {
                ++length;
            }
            reader_.sliceBuilder.write(reader_.get(length));
        }


        /// Move to next token in the file/stream.
        ///
        /// We ignore spaces, line breaks and comments.
        /// If we find a line break in the block context, we set
        /// allowSimpleKey` on.
        ///
        /// We do not yet support BOM inside the stream as the
        /// specification requires. Any such mark will be considered as a part
        /// of the document.
        void scanToNextToken() @safe
        {
            // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
            //   Tabs cannot precede tokens
            //   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
            //   KEY(block), VALUE(block), BLOCK-ENTRY
            // So the checking code is
            //   if <TAB>:
            //       allowSimpleKey_ = false
            // We also need to add the check for `allowSimpleKey_ == true` to
            // `unwindIndent` before issuing BLOCK-END.
            // Scanners for block, flow, and plain scalars need to be modified.

            for(;;)
            {
                findNextNonSpace();

                if(reader_.peekByte() == '#') { scanToNextBreak(); }
                if(scanLineBreak() != '\0')
                {
                    if(flowLevel_ == 0) { allowSimpleKey_ = true; }
                }
                else
                {
                    break;
                }
            }
        }

        /// Scan directive token.
        Token scanDirective() @safe
        {
            Mark startMark = reader_.mark;
            // Skip the '%'.
            reader_.forward();

            // Scan directive name
            reader_.sliceBuilder.begin();
            scanDirectiveNameToSlice(startMark);
            if(error_) { return Token.init; }
            const name = reader_.sliceBuilder.finish();

            reader_.sliceBuilder.begin();

            // Index where tag handle ends and suffix starts in a tag directive value.
            uint tagHandleEnd = uint.max;
            if(name == "YAML")     { scanYAMLDirectiveValueToSlice(startMark); }
            else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
            if(error_) { return Token.init; }
            char[] value = reader_.sliceBuilder.finish();

            Mark endMark = reader_.mark;

            DirectiveType directive;
            if(name == "YAML")     { directive = DirectiveType.yaml; }
            else if(name == "TAG") { directive = DirectiveType.tag; }
            else
            {
                directive = DirectiveType.reserved;
                scanToNextBreak();
            }

            scanDirectiveIgnoredLine(startMark);
            if(error_) { return Token.init; }

            return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
        }

        /// Scan name of a directive token.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanDirectiveNameToSlice(const Mark startMark) @safe
        {
            // Scan directive name.
            scanAlphaNumericToSlice!"a directive"(startMark);
            if(error_) { return; }

            if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
            error("While scanning a directive", startMark,
                  expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
        }

        /// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe
        {
            findNextNonSpace();

            scanYAMLDirectiveNumberToSlice(startMark);
            if(error_) { return; }

            if(reader_.peekByte() != '.')
            {
                error("While scanning a directive", startMark,
                      expected("digit or '.'", reader_.peek()), reader_.mark);
                return;
            }
            // Skip the '.'.
            reader_.forward();

            reader_.sliceBuilder.write('.');
            scanYAMLDirectiveNumberToSlice(startMark);
            if(error_) { return; }

            if(!" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
            {
                error("While scanning a directive", startMark,
                      expected("digit or '.'", reader_.peek()), reader_.mark);
            }
        }

        /// Scan a number from a YAML directive.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe
        {
            if(!isDigit(reader_.peek()))
            {
                error("While scanning a directive", startMark,
                      expected("digit", reader_.peek()), reader_.mark);
                return;
            }

            // Already found the first digit in the enforce(), so set length to 1.
            uint length = 1;
            while(reader_.peek(length).isDigit) { ++length; }

            reader_.sliceBuilder.write(reader_.get(length));
        }

        /// Scan value of a tag directive.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// Returns: Length of tag handle (which is before tag prefix) in scanned data
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        uint scanTagDirectiveValueToSlice(const Mark startMark) @safe
        {
            findNextNonSpace();
            const startLength = reader_.sliceBuilder.length;
            scanTagDirectiveHandleToSlice(startMark);
            if(error_) { return uint.max; }
            const handleLength = cast(uint)(reader_.sliceBuilder.length  - startLength);
            findNextNonSpace();
            scanTagDirectivePrefixToSlice(startMark);

            return handleLength;
        }

        /// Scan handle of a tag directive.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanTagDirectiveHandleToSlice(const Mark startMark) @safe
        {
            scanTagHandleToSlice!"directive"(startMark);
            if(error_) { return; }
            if(reader_.peekByte() == ' ') { return; }
            error("While scanning a directive handle", startMark,
                  expected("' '", reader_.peek()), reader_.mark);
        }

        /// Scan prefix of a tag directive.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanTagDirectivePrefixToSlice(const Mark startMark) @safe
        {
            scanTagURIToSlice!"directive"(startMark);
            if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
            error("While scanning a directive prefix", startMark,
                  expected("' '", reader_.peek()), reader_.mark);
        }

        /// Scan (and ignore) ignored line after a directive.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanDirectiveIgnoredLine(const Mark startMark) @safe
        {
            findNextNonSpace();
            if(reader_.peekByte() == '#') { scanToNextBreak(); }
            if(reader_.peek().isBreak)
            {
                scanLineBreak();
                return;
            }
            error("While scanning a directive", startMark,
                  expected("comment or a line break", reader_.peek()), reader_.mark);
        }


        /// Scan an alias or an anchor.
        ///
        /// The specification does not restrict characters for anchors and
        /// aliases. This may lead to problems, for instance, the document:
        ///   [ *alias, value ]
        /// can be interpteted in two ways, as
        ///   [ "value" ]
        /// and
        ///   [ *alias , "value" ]
        /// Therefore we restrict aliases to ASCII alphanumeric characters.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        Token scanAnchor(const TokenID id) @safe
        {
            const startMark = reader_.mark;
            const dchar i = reader_.get();

            reader_.sliceBuilder.begin();
            if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
            else         { scanAlphaNumericToSlice!"an anchor"(startMark); }
            // On error, value is discarded as we return immediately
            char[] value = reader_.sliceBuilder.finish();
            if(error_)   { return Token.init; }

            if(!reader_.peek().isWhiteSpace &&
               !"?:,]}%@"d.canFind(reader_.peekByte()))
            {
                enum anchorCtx = "While scanning an anchor";
                enum aliasCtx  = "While scanning an alias";
                error(i == '*' ? aliasCtx : anchorCtx, startMark,
                      expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
                return Token.init;
            }

            if(id == TokenID.alias_)
            {
                return aliasToken(startMark, reader_.mark, value);
            }
            if(id == TokenID.anchor)
            {
                return anchorToken(startMark, reader_.mark, value);
            }
            assert(false, "This code should never be reached");
        }

        /// Scan a tag token.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        Token scanTag() @safe
        {
            const startMark = reader_.mark;
            dchar c = reader_.peek(1);

            reader_.sliceBuilder.begin();
            scope(failure) { reader_.sliceBuilder.finish(); }
            // Index where tag handle ends and tag suffix starts in the tag value
            // (slice) we will produce.
            uint handleEnd;

            if(c == '<')
            {
                reader_.forward(2);

                handleEnd = 0;
                scanTagURIToSlice!"tag"(startMark);
                if(error_) { return Token.init; }
                if(reader_.peekByte() != '>')
                {
                    error("While scanning a tag", startMark,
                          expected("'>'", reader_.peek()), reader_.mark);
                    return Token.init;
                }
                reader_.forward();
            }
            else if(c.isWhiteSpace)
            {
                reader_.forward();
                handleEnd = 0;
                reader_.sliceBuilder.write('!');
            }
            else
            {
                uint length = 1;
                bool useHandle;

                while(!c.isBreakOrSpace)
                {
                    if(c == '!')
                    {
                        useHandle = true;
                        break;
                    }
                    ++length;
                    c = reader_.peek(length);
                }

                if(useHandle)
                {
                    scanTagHandleToSlice!"tag"(startMark);
                    handleEnd = cast(uint)reader_.sliceBuilder.length;
                    if(error_) { return Token.init; }
                }
                else
                {
                    reader_.forward();
                    reader_.sliceBuilder.write('!');
                    handleEnd = cast(uint)reader_.sliceBuilder.length;
                }

                scanTagURIToSlice!"tag"(startMark);
                if(error_) { return Token.init; }
            }

            if(reader_.peek().isBreakOrSpace)
            {
                char[] slice = reader_.sliceBuilder.finish();
                return tagToken(startMark, reader_.mark, slice, handleEnd);
            }

            error("While scanning a tag", startMark, expected("' '", reader_.peek()),
                  reader_.mark);
            return Token.init;
        }

        /// Scan a block scalar token with specified style.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        Token scanBlockScalar(const ScalarStyle style) @safe
        {
            const startMark = reader_.mark;

            // Scan the header.
            reader_.forward();

            const indicators = scanBlockScalarIndicators(startMark);
            if(error_) { return Token.init; }

            const chomping   = indicators[0];
            const increment  = indicators[1];
            scanBlockScalarIgnoredLine(startMark);
            if(error_) { return Token.init; }

            // Determine the indentation level and go to the first non-empty line.
            Mark endMark;
            uint indent = max(1, indent_ + 1);

            reader_.sliceBuilder.begin();
            alias Transaction = SliceBuilder.Transaction;
            // Used to strip the last line breaks written to the slice at the end of the
            // scalar, which may be needed based on chomping.
            Transaction breaksTransaction = Transaction(&reader_.sliceBuilder);
            // Read the first indentation/line breaks before the scalar.
            size_t startLen = reader_.sliceBuilder.length;
            if(increment == int.min)
            {
                auto indentation = scanBlockScalarIndentationToSlice();
                endMark = indentation[1];
                indent  = max(indent, indentation[0]);
            }
            else
            {
                indent += increment - 1;
                endMark = scanBlockScalarBreaksToSlice(indent);
            }

            // int.max means there's no line break (int.max is outside UTF-32).
            dchar lineBreak = cast(dchar)int.max;

            // Scan the inner part of the block scalar.
            while(reader_.column == indent && reader_.peekByte() != '\0')
            {
                breaksTransaction.commit();
                const bool leadingNonSpace = !" \t"d.canFind(reader_.peekByte());
                // This is where the 'interesting' non-whitespace data gets read.
                scanToNextBreakToSlice();
                lineBreak = scanLineBreak();


                // This transaction serves to rollback data read in the
                // scanBlockScalarBreaksToSlice() call.
                breaksTransaction = Transaction(&reader_.sliceBuilder);
                startLen = reader_.sliceBuilder.length;
                // The line breaks should actually be written _after_ the if() block
                // below. We work around that by inserting
                endMark = scanBlockScalarBreaksToSlice(indent);

                // This will not run during the last iteration (see the if() vs the
                // while()), hence breaksTransaction rollback (which happens after this
                // loop) will never roll back data written in this if() block.
                if(reader_.column == indent && reader_.peekByte() != '\0')
                {
                    // Unfortunately, folding rules are ambiguous.

                    // This is the folding according to the specification:
                    if(style == ScalarStyle.folded && lineBreak == '\n' &&
                       leadingNonSpace && !" \t"d.canFind(reader_.peekByte()))
                    {
                        // No breaks were scanned; no need to insert the space in the
                        // middle of slice.
                        if(startLen == reader_.sliceBuilder.length)
                        {
                            reader_.sliceBuilder.write(' ');
                        }
                    }
                    else
                    {
                        // We need to insert in the middle of the slice in case any line
                        // breaks were scanned.
                        reader_.sliceBuilder.insert(lineBreak, startLen);
                    }

                    ////this is Clark Evans's interpretation (also in the spec
                    ////examples):
                    //
                    //if(style == ScalarStyle.folded && lineBreak == '\n')
                    //{
                    //    if(startLen == endLen)
                    //    {
                    //        if(!" \t"d.canFind(reader_.peekByte()))
                    //        {
                    //            reader_.sliceBuilder.write(' ');
                    //        }
                    //        else
                    //        {
                    //            chunks ~= lineBreak;
                    //        }
                    //    }
                    //}
                    //else
                    //{
                    //    reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
                    //}
                }
                else
                {
                    break;
                }
            }

            // If chompint is Keep, we keep (commit) the last scanned line breaks
            // (which are at the end of the scalar). Otherwise re remove them (end the
            // transaction).
            if(chomping == Chomping.keep)  { breaksTransaction.commit(); }
            else                           { breaksTransaction.end(); }
            if(chomping != Chomping.strip && lineBreak != int.max)
            {
                // If chomping is Keep, we keep the line break but the first line break
                // that isn't stripped (since chomping isn't Strip in this branch) must
                // be inserted _before_ the other line breaks.
                if(chomping == Chomping.keep)
                {
                    reader_.sliceBuilder.insert(lineBreak, startLen);
                }
                // If chomping is not Keep, breaksTransaction was cancelled so we can
                // directly write the first line break (as it isn't stripped - chomping
                // is not Strip)
                else
                {
                    reader_.sliceBuilder.write(lineBreak);
                }
            }

            char[] slice = reader_.sliceBuilder.finish();
            return scalarToken(startMark, endMark, slice, style);
        }

        /// Scan chomping and indentation indicators of a scalar token.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe
        {
            auto chomping = Chomping.clip;
            int increment = int.min;
            dchar c       = reader_.peek();

            /// Indicators can be in any order.
            if(getChomping(c, chomping))
            {
                getIncrement(c, increment, startMark);
                if(error_) { return tuple(Chomping.init, int.max); }
            }
            else
            {
                const gotIncrement = getIncrement(c, increment, startMark);
                if(error_)       { return tuple(Chomping.init, int.max); }
                if(gotIncrement) { getChomping(c, chomping); }
            }

            if(" \0\n\r\u0085\u2028\u2029"d.canFind(c))
            {
                return tuple(chomping, increment);
            }
            error("While scanning a block scalar", startMark,
                  expected("chomping or indentation indicator", c), reader_.mark);
            return tuple(Chomping.init, int.max);
        }

        /// Get chomping indicator, if detected. Return false otherwise.
        ///
        /// Used in scanBlockScalarIndicators.
        ///
        /// Params:
        ///
        /// c        = The character that may be a chomping indicator.
        /// chomping = Write the chomping value here, if detected.
        bool getChomping(ref dchar c, ref Chomping chomping) @safe
        {
            if(!"+-"d.canFind(c)) { return false; }
            chomping = c == '+' ? Chomping.keep : Chomping.strip;
            reader_.forward();
            c = reader_.peek();
            return true;
        }

        /// Get increment indicator, if detected. Return false otherwise.
        ///
        /// Used in scanBlockScalarIndicators.
        ///
        /// Params:
        ///
        /// c         = The character that may be an increment indicator.
        ///             If an increment indicator is detected, this will be updated to
        ///             the next character in the Reader.
        /// increment = Write the increment value here, if detected.
        /// startMark = Mark for error messages.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe
        {
            if(!c.isDigit) { return false; }
            // Convert a digit to integer.
            increment = c - '0';
            assert(increment < 10 && increment >= 0, "Digit has invalid value");
            if(increment > 0)
            {
                reader_.forward();
                c = reader_.peek();
                return true;
            }
            error("While scanning a block scalar", startMark,
                  expected("indentation indicator in range 1-9", "0"), reader_.mark);
            return false;
        }

        /// Scan (and ignore) ignored line in a block scalar.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanBlockScalarIgnoredLine(const Mark startMark) @safe
        {
            findNextNonSpace();
            if(reader_.peekByte()== '#') { scanToNextBreak(); }

            if(reader_.peek().isBreak)
            {
                scanLineBreak();
                return;
            }
            error("While scanning a block scalar", startMark,
                  expected("comment or line break", reader_.peek()), reader_.mark);
        }

        /// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe
        {
            uint maxIndent;
            Mark endMark = reader_.mark;

            while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
            {
                if(reader_.peekByte() != ' ')
                {
                    reader_.sliceBuilder.write(scanLineBreak());
                    endMark = reader_.mark;
                    continue;
                }
                reader_.forward();
                maxIndent = max(reader_.column, maxIndent);
            }

            return tuple(maxIndent, endMark);
        }

        /// Scan line breaks at lower or specified indentation in a block scalar.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        Mark scanBlockScalarBreaksToSlice(const uint indent) @safe
        {
            Mark endMark = reader_.mark;

            for(;;)
            {
                while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
                if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))  { break; }
                reader_.sliceBuilder.write(scanLineBreak());
                endMark = reader_.mark;
            }

            return endMark;
        }

        /// Scan a qouted flow scalar token with specified quotes.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        Token scanFlowScalar(const ScalarStyle quotes) @safe
        {
            const startMark = reader_.mark;
            const quote     = reader_.get();

            reader_.sliceBuilder.begin();
            scope(exit) if(error_) { reader_.sliceBuilder.finish(); }

            scanFlowScalarNonSpacesToSlice(quotes, startMark);
            if(error_) { return Token.init; }

            while(reader_.peek() != quote)
            {
                scanFlowScalarSpacesToSlice(startMark);
                if(error_) { return Token.init; }
                scanFlowScalarNonSpacesToSlice(quotes, startMark);
                if(error_) { return Token.init; }
            }
            reader_.forward();

            auto slice = reader_.sliceBuilder.finish();
            return scalarToken(startMark, reader_.mark, slice, quotes);
        }

        /// Scan nonspace characters in a flow scalar.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
            @safe
        {
            for(;;)
            {
                dchar c = reader_.peek();

                size_t numCodePoints;
                // This is an optimized way of writing:
                // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
                outer: for(size_t oldSliceLength;;)
                {
                    // This will not necessarily make slice 32 chars longer, as not all
                    // code points are 1 char.
                    const char[] slice = reader_.slice(numCodePoints + 32);
                    if(slice.length == oldSliceLength)
                    {
                        error("While reading a flow scalar", startMark,
                              "reached end of file", reader_.mark);
                        return;
                    }
                    for(size_t i = oldSliceLength; i < slice.length;)
                    {
                        // slice is UTF-8 - need to decode
                        const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
                        if(ch.isFlowScalarBreakSpace) { break outer; }
                        ++numCodePoints;
                    }
                    oldSliceLength = slice.length;
                }

                reader_.sliceBuilder.write(reader_.get(numCodePoints));

                c = reader_.peek();
                if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'')
                {
                    reader_.forward(2);
                    reader_.sliceBuilder.write('\'');
                }
                else if((quotes == ScalarStyle.doubleQuoted && c == '\'') ||
                        (quotes == ScalarStyle.singleQuoted && "\"\\"d.canFind(c)))
                {
                    reader_.forward();
                    reader_.sliceBuilder.write(c);
                }
                else if(quotes == ScalarStyle.doubleQuoted && c == '\\')
                {
                    reader_.forward();
                    c = reader_.peek();
                    if(dyaml.escapes.escapes.canFind(c))
                    {
                        reader_.forward();
                        // Escaping has been moved to Parser as it can't be done in
                        // place (in a slice) in case of '\P' and '\L' (very uncommon,
                        // but we don't want to break the spec)
                        char[2] escapeSequence = ['\\', cast(char)c];
                        reader_.sliceBuilder.write(escapeSequence);
                    }
                    else if(dyaml.escapes.escapeHexCodeList.canFind(c))
                    {
                        const hexLength = dyaml.escapes.escapeHexLength(c);
                        reader_.forward();

                        foreach(i; 0 .. hexLength) if(!reader_.peek(i).isHexDigit)
                        {
                            error("While scanning a double quoted scalar", startMark,
                                  expected("escape sequence of hexadecimal numbers",
                                           reader_.peek(i)), reader_.mark);
                            return;
                        }
                        char[] hex = reader_.get(hexLength);
                        char[2] escapeStart = ['\\', cast(char) c];
                        reader_.sliceBuilder.write(escapeStart);
                        reader_.sliceBuilder.write(hex);
                        // Note: This is just error checking; Parser does the actual
                        //       escaping (otherwise we could accidentally create an
                        //       escape sequence here that wasn't in input, breaking the
                        //       escaping code in parser, which is in parser because it
                        //       can't always be done in place)
                        try {
                            parse!int(hex, 16u);
                        }
                        catch (Exception)
                        {
                            error("While scanning a double quoted scalar", startMark,
                                  "overflow when parsing an escape sequence of " ~
                                  "hexadecimal numbers.", reader_.mark);
                            return;
                        }
                    }
                    else if("\n\r\u0085\u2028\u2029"d.canFind(c))
                    {
                        scanLineBreak();
                        scanFlowScalarBreaksToSlice(startMark);
                        if(error_) { return; }
                    }
                    else
                    {
                        error("While scanning a double quoted scalar", startMark,
                              buildMsg("found unsupported escape character", c),
                              reader_.mark);
                        return;
                    }
                }
                else { return; }
            }
        }

        /// Scan space characters in a flow scalar.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// spaces into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanFlowScalarSpacesToSlice(const Mark startMark) @safe
        {
            // Increase length as long as we see whitespace.
            size_t length;
            while(" \t"d.canFind(reader_.peekByte(length))) { ++length; }
            auto whitespaces = reader_.prefixBytes(length);

            // Can check the last byte without striding because '\0' is ASCII
            const c = reader_.peek(length);
            if(c == '\0')
            {
                error("While scanning a quoted scalar", startMark,
                      "found unexpected end of buffer", reader_.mark);
                return;
            }

            // Spaces not followed by a line break.
            if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
            {
                reader_.forward(length);
                reader_.sliceBuilder.write(whitespaces);
                return;
            }

            // There's a line break after the spaces.
            reader_.forward(length);
            const lineBreak = scanLineBreak();

            if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }

            // If we have extra line breaks after the first, scan them into the
            // slice.
            const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
            if(error_) { return; }

            // No extra breaks, one normal line break. Replace it with a space.
            if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
        }

        /// Scan line breaks in a flow scalar.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// line breaks into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe
        {
            // True if at least one line break was found.
            bool anyBreaks;
            for(;;)
            {
                // Instead of checking indentation, we check for document separators.
                const prefix = reader_.prefix(3);
                if((prefix == "---" || prefix == "...") &&
                   reader_.peek(3).isWhiteSpace)
                {
                    error("While scanning a quoted scalar", startMark,
                          "found unexpected document separator", reader_.mark);
                    return false;
                }

                // Skip any whitespaces.
                while(" \t"d.canFind(reader_.peekByte())) { reader_.forward(); }

                // Encountered a non-whitespace non-linebreak character, so we're done.
                if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }

                const lineBreak = scanLineBreak();
                anyBreaks = true;
                reader_.sliceBuilder.write(lineBreak);
            }
            return anyBreaks;
        }

        /// Scan plain scalar token (no block, no quotes).
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        Token scanPlain() @safe
        {
            // We keep track of the allowSimpleKey_ flag here.
            // Indentation rules are loosed for the flow context
            const startMark = reader_.mark;
            Mark endMark = startMark;
            const indent = indent_ + 1;

            // We allow zero indentation for scalars, but then we need to check for
            // document separators at the beginning of the line.
            // if(indent == 0) { indent = 1; }

            reader_.sliceBuilder.begin();

            alias Transaction = SliceBuilder.Transaction;
            Transaction spacesTransaction;
            // Stop at a comment.
            while(reader_.peekByte() != '#')
            {
                // Scan the entire plain scalar.
                size_t length;
                dchar c = void;
                // Moved the if() out of the loop for optimization.
                if(flowLevel_ == 0)
                {
                    c = reader_.peek(length);
                    for(;;)
                    {
                        const cNext = reader_.peek(length + 1);
                        if(c.isWhiteSpace ||
                           (c == ':' && cNext.isWhiteSpace))
                        {
                            break;
                        }
                        ++length;
                        c = cNext;
                    }
                }
                else
                {
                    for(;;)
                    {
                        c = reader_.peek(length);
                        if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
                        {
                            break;
                        }
                        ++length;
                    }
                }

                // It's not clear what we should do with ':' in the flow context.
                if(flowLevel_ > 0 && c == ':' &&
                   !reader_.peek(length + 1).isWhiteSpace &&
                   !",[]{}"d.canFind(reader_.peek(length + 1)))
                {
                    // This is an error; throw the slice away.
                    spacesTransaction.commit();
                    reader_.sliceBuilder.finish();
                    reader_.forward(length);
                    error("While scanning a plain scalar", startMark,
                          "found unexpected ':' . Please check " ~
                          "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.",
                          reader_.mark);
                    return Token.init;
                }

                if(length == 0) { break; }

                allowSimpleKey_ = false;

                reader_.sliceBuilder.write(reader_.get(length));

                endMark = reader_.mark;

                spacesTransaction.commit();
                spacesTransaction = Transaction(&reader_.sliceBuilder);

                const startLength = reader_.sliceBuilder.length;
                scanPlainSpacesToSlice();
                if(startLength == reader_.sliceBuilder.length ||
                   (flowLevel_ == 0 && reader_.column < indent))
                {
                    break;
                }
            }

            spacesTransaction.end();
            char[] slice = reader_.sliceBuilder.finish();

            return scalarToken(startMark, endMark, slice, ScalarStyle.plain);
        }

        /// Scan spaces in a plain scalar.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the spaces
        /// into that slice.
        void scanPlainSpacesToSlice() @safe
        {
            // The specification is really confusing about tabs in plain scalars.
            // We just forbid them completely. Do not use tabs in YAML!

            // Get as many plain spaces as there are.
            size_t length;
            while(reader_.peekByte(length) == ' ') { ++length; }
            char[] whitespaces = reader_.prefixBytes(length);
            reader_.forward(length);

            const dchar c = reader_.peek();
            if(!c.isNSChar)
            {
                // We have spaces, but no newline.
                if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
                return;
            }

            // Newline after the spaces (if any)
            const lineBreak = scanLineBreak();
            allowSimpleKey_ = true;

            static bool end(Reader reader_) @safe pure
            {
                const prefix = reader_.prefix(3);
                return ("---" == prefix || "..." == prefix)
                        && " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
            }

            if(end(reader_)) { return; }

            bool extraBreaks;

            alias Transaction = SliceBuilder.Transaction;
            auto transaction = Transaction(&reader_.sliceBuilder);
            if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
            while(reader_.peek().isNSChar)
            {
                if(reader_.peekByte() == ' ') { reader_.forward(); }
                else
                {
                    const lBreak = scanLineBreak();
                    extraBreaks  = true;
                    reader_.sliceBuilder.write(lBreak);

                    if(end(reader_)) { return; }
                }
            }
            transaction.commit();

            // No line breaks, only a space.
            if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
        }

        /// Scan handle of a tag token.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanTagHandleToSlice(string name)(const Mark startMark)
        {
            dchar c = reader_.peek();
            enum contextMsg = "While scanning a " ~ name;
            if(c != '!')
            {
                error(contextMsg, startMark, expected("'!'", c), reader_.mark);
                return;
            }

            uint length = 1;
            c = reader_.peek(length);
            if(c != ' ')
            {
                while(c.isAlphaNum || "-_"d.canFind(c))
                {
                    ++length;
                    c = reader_.peek(length);
                }
                if(c != '!')
                {
                    reader_.forward(length);
                    error(contextMsg, startMark, expected("'!'", c), reader_.mark);
                    return;
                }
                ++length;
            }

            reader_.sliceBuilder.write(reader_.get(length));
        }

        /// Scan URI in a tag token.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanTagURIToSlice(string name)(const Mark startMark)
        {
            // Note: we do not check if URI is well-formed.
            dchar c = reader_.peek();
            const startLen = reader_.sliceBuilder.length;
            {
                uint length;
                while(c.isAlphaNum || c.isURIChar)
                {
                    if(c == '%')
                    {
                        auto chars = reader_.get(length);
                        reader_.sliceBuilder.write(chars);
                        length = 0;
                        scanURIEscapesToSlice!name(startMark);
                        if(error_) { return; }
                    }
                    else { ++length; }
                    c = reader_.peek(length);
                }
                if(length > 0)
                {
                    auto chars = reader_.get(length);
                    reader_.sliceBuilder.write(chars);
                    length = 0;
                }
            }
            // OK if we scanned something, error otherwise.
            if(reader_.sliceBuilder.length > startLen) { return; }

            enum contextMsg = "While parsing a " ~ name;
            error(contextMsg, startMark, expected("URI", c), reader_.mark);
        }

        // Not @nogc yet because std.utf.decode is not @nogc
        /// Scan URI escape sequences.
        ///
        /// Assumes that the caller is building a slice in Reader, and puts the scanned
        /// characters into that slice.
        ///
        /// In case of an error, error_ is set. Use throwIfError() to handle this.
        void scanURIEscapesToSlice(string name)(const Mark startMark)
        {
            import core.exception : UnicodeException;
            // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
            // decoding into UTF-32.
            Appender!string buffer;


            enum contextMsg = "While scanning a " ~ name;
            while(reader_.peekByte() == '%')
            {
                reader_.forward();
                char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)];
                if(!nextByte[0].isHexDigit || !nextByte[1].isHexDigit)
                {
                    auto msg = expected("URI escape sequence of 2 hexadecimal " ~
                                        "numbers", nextByte);
                    error(contextMsg, startMark, msg, reader_.mark);
                    return;
                }
                buffer ~= nextByte[].to!ubyte(16);

                reader_.forward(2);
            }
            try
            {
                foreach (dchar chr; buffer.data)
                {
                    reader_.sliceBuilder.write(chr);
                }
            }
            catch (UnicodeException)
            {
                error(contextMsg, startMark,
                        "Invalid UTF-8 data encoded in URI escape sequence",
                        reader_.mark);
                return;
            }
        }


        /// Scan a line break, if any.
        ///
        /// Transforms:
        ///   '\r\n'      :   '\n'
        ///   '\r'        :   '\n'
        ///   '\n'        :   '\n'
        ///   '\u0085'    :   '\n'
        ///   '\u2028'    :   '\u2028'
        ///   '\u2029     :   '\u2029'
        ///   no break    :   '\0'
        dchar scanLineBreak() @safe
        {
            // Fast path for ASCII line breaks.
            const b = reader_.peekByte();
            if(b < 0x80)
            {
                if(b == '\n' || b == '\r')
                {
                    if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
                    else { reader_.forward(); }
                    return '\n';
                }
                return '\0';
            }

            const c = reader_.peek();
            if(c == '\x85')
            {
                reader_.forward();
                return '\n';
            }
            if(c == '\u2028' || c == '\u2029')
            {
                reader_.forward();
                return c;
            }
            return '\0';
        }
}

private:

/// A nothrow function that converts a dchar[] to a string.
string utf32To8(C)(C[] str)
    if(is(Unqual!C == dchar))
{
    try                    { return str.to!string; }
    catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); }
    catch(Exception e)     { assert(false, "Unexpected exception during UTF-8 encoding"); }
}