diff --git a/meson.build b/meson.build index 82a6252..ae95b03 100644 --- a/meson.build +++ b/meson.build @@ -18,13 +18,11 @@ dyaml_src = [ 'source/dyaml/escapes.d', 'source/dyaml/event.d', 'source/dyaml/exception.d', - 'source/dyaml/fastcharsearch.d', 'source/dyaml/flags.d', 'source/dyaml/hacks.d', 'source/dyaml/linebreak.d', 'source/dyaml/loader.d', 'source/dyaml/node.d', - 'source/dyaml/nogcutil.d', 'source/dyaml/package.d', 'source/dyaml/parser.d', 'source/dyaml/queue.d', @@ -46,8 +44,7 @@ dyaml_src = [ 'source/dyaml/test/representer.d', 'source/dyaml/test/resolver.d', 'source/dyaml/test/tokens.d', - 'source/dyaml/token.d', - 'source/dyaml/unused.d' + 'source/dyaml/token.d' ] install_subdir('source/dyaml', install_dir: 'include/d/yaml/') diff --git a/source/dyaml/constructor.d b/source/dyaml/constructor.d index 58276b1..2396f6c 100644 --- a/source/dyaml/constructor.d +++ b/source/dyaml/constructor.d @@ -100,17 +100,6 @@ final class Constructor addConstructorScalar("tag:yaml.org,2002:merge", &constructMerge); } - /// Destroy the constructor. - @nogc pure @safe nothrow ~this() - { - fromScalar_.destroy(); - fromScalar_ = null; - fromSequence_.destroy(); - fromSequence_ = null; - fromMapping_.destroy(); - fromMapping_ = null; - } - /** Add a constructor function from scalar. * * The function must take a reference to $(D Node) to construct from. diff --git a/source/dyaml/emitter.d b/source/dyaml/emitter.d index 6fba0fa..0df59a5 100644 --- a/source/dyaml/emitter.d +++ b/source/dyaml/emitter.d @@ -29,7 +29,6 @@ import dyaml.encoding; import dyaml.escapes; import dyaml.event; import dyaml.exception; -import dyaml.fastcharsearch; import dyaml.flags; import dyaml.linebreak; import dyaml.queue; @@ -61,11 +60,13 @@ struct ScalarAnalysis "allowSingleQuoted", "allowDoubleQuoted", "allowBlock", "isNull") flags; } -///Quickly determines if a character is a newline. -private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_; +private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029'); -// override the canFind added by the FastCharSearch mixins -private alias canFind = std.algorithm.canFind; +private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`'); + +private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}'); + +private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t'); //Emits YAML events into a file/stream. struct Emitter @@ -1036,14 +1037,11 @@ struct Emitter foreach(const size_t index, const dchar c; scalar) { - mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch; - mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch; - //Check for indicators. if(index == 0) { //Leading indicators are special characters. - if(specialCharSearch.canFind(c)) + if(c.isSpecialChar) { flowIndicators = blockIndicators = true; } @@ -1060,7 +1058,7 @@ struct Emitter else { //Some indicators cannot appear within a scalar as well. - if(flowIndicatorSearch.canFind(c)){flowIndicators = true;} + if(c.isFlowIndicator){flowIndicators = true;} if(c == ':') { flowIndicators = true; @@ -1073,7 +1071,7 @@ struct Emitter } //Check for line breaks, special, and unicode characters. - if(newlineSearch_.canFind(c)){lineBreaks = true;} + if(c.isNewLine){lineBreaks = true;} if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) && !((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF')) @@ -1090,7 +1088,7 @@ struct Emitter previousSpace = true; previousBreak = false; } - else if(newlineSearch_.canFind(c)) + else if(c.isNewLine) { if(index == 0){leadingBreak = true;} if(index == scalar.length - 1){trailingBreak = true;} @@ -1103,11 +1101,10 @@ struct Emitter previousSpace = previousBreak = false; } - mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch; //Prepare for the next character. - preceededByWhitespace = spaceSearch.canFind(c); + preceededByWhitespace = c.isSpace != 0; followedByWhitespace = index + 2 >= scalar.length || - spaceSearch.canFind(scalar[index + 2]); + scalar[index + 2].isSpace; } with(analysis.flags) @@ -1347,14 +1344,14 @@ struct ScalarWriter } else if(breaks_) { - if(!newlineSearch_.canFind(c)) + if(!c.isNewLine) { writeStartLineBreak(); writeLineBreaks(); emitter_.writeIndent(); } } - else if((c == dcharNone || c == '\'' || c == ' ' || newlineSearch_.canFind(c)) + else if((c == dcharNone || c == '\'' || c == ' ' || c.isNewLine) && startChar_ < endChar_) { writeCurrentRange(Flag!"UpdateColumn".yes); @@ -1448,7 +1445,7 @@ struct ScalarWriter const dchar c = nextChar(); if(breaks_) { - if(!newlineSearch_.canFind(c)) + if(!c.isNewLine) { if(!leadingSpace && c != dcharNone && c != ' ') { @@ -1471,7 +1468,7 @@ struct ScalarWriter writeCurrentRange(Flag!"UpdateColumn".yes); } } - else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ') + else if(c == dcharNone || c.isNewLine || c == ' ') { writeCurrentRange(Flag!"UpdateColumn".yes); if(c == dcharNone){emitter_.writeLineBreak();} @@ -1492,13 +1489,13 @@ struct ScalarWriter const dchar c = nextChar(); if(breaks_) { - if(!newlineSearch_.canFind(c)) + if(!c.isNewLine) { writeLineBreaks(); if(c != dcharNone){emitter_.writeIndent();} } } - else if(c == dcharNone || newlineSearch_.canFind(c)) + else if(c == dcharNone || c.isNewLine) { writeCurrentRange(Flag!"UpdateColumn".no); if(c == dcharNone){emitter_.writeLineBreak();} @@ -1538,14 +1535,14 @@ struct ScalarWriter } else if(breaks_) { - if(!newlineSearch_.canFind(c)) + if(!c.isNewLine) { writeStartLineBreak(); writeLineBreaks(); writeIndent(Flag!"ResetSpace".yes); } } - else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ') + else if(c == dcharNone || c.isNewLine || c == ' ') { writeCurrentRange(Flag!"UpdateColumn".yes); } @@ -1600,15 +1597,15 @@ struct ScalarWriter const last = lastChar(text_, end); const secondLast = end > 0 ? lastChar(text_, end) : 0; - if(newlineSearch_.canFind(text_[0]) || text_[0] == ' ') + if(text_[0].isNewLine || text_[0] == ' ') { hints[hintsIdx++] = cast(char)('0' + bestIndent); } - if(!newlineSearch_.canFind(last)) + if(!last.isNewLine) { hints[hintsIdx++] = '-'; } - else if(std.utf.count(text_) == 1 || newlineSearch_.canFind(secondLast)) + else if(std.utf.count(text_) == 1 || secondLast.isNewLine) { hints[hintsIdx++] = '+'; } @@ -1680,7 +1677,7 @@ struct ScalarWriter void updateBreaks(in dchar c, const Flag!"UpdateSpaces" updateSpaces) pure @safe { if(c == dcharNone){return;} - breaks_ = newlineSearch_.canFind(c); + breaks_ = (c.isNewLine != 0); if(updateSpaces){spaces_ = c == ' ';} } diff --git a/source/dyaml/fastcharsearch.d b/source/dyaml/fastcharsearch.d deleted file mode 100644 index 8d524fd..0000000 --- a/source/dyaml/fastcharsearch.d +++ /dev/null @@ -1,95 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.fastcharsearch; - - -import std.algorithm; -import std.conv; - - -package: - -/** - * Mixin used for fast searching for a character in string. - * - * Creates a lookup table to quickly determine if a character - * is present in the string. Size of the lookup table is limited; - * any characters not represented in the table will be checked - * by ordinary equality comparison. - * - * Params: chars = String to search in. - * tableSize = Maximum number of bytes used by the table. - * - * Generated method: - * bool canFind(dchar c) - * - * Determines if a character is in the string. - */ -template FastCharSearch(dstring chars, uint tableSize = 256) -{ - private mixin(searchCode!(chars, tableSize)()); -} - -/// Generate the search table and the canFind method. -string searchCode(dstring chars, uint tableSize)() -{ - import std.string; - - const tableSizeStr = tableSize.to!string; - ubyte[tableSize] table; - table[] = 0; - - //Characters that don't fit in the table. - dchar[] specialChars; - - foreach(c; chars) - { - if(c < tableSize) { table[c] = 1; } - else { specialChars ~= c; } - } - - string specialCharsCode() - { - return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || }); - } - - const caseInTable = - q{ - if(c < %s) - { - return cast(immutable(bool))table_[c]; - } - }.format(tableSize); - - string code; - if(tableSize) - { - code ~= - q{ - static immutable ubyte[%s] table_ = [ - %s]; - }.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, })); - } - code ~= - q{ - bool canFind(const dchar c) @safe pure nothrow @nogc - { - %s - - return %s; - } - }.format(tableSize ? caseInTable : "", - specialChars.length ? specialCharsCode() : q{false}); - - return code; -} - -@safe unittest -{ - mixin FastCharSearch!("+", 128) search; - assert(search.canFind('+')); -} diff --git a/source/dyaml/loader.d b/source/dyaml/loader.d index 2e07b2d..dcd4da8 100644 --- a/source/dyaml/loader.d +++ b/source/dyaml/loader.d @@ -126,13 +126,6 @@ struct Loader } } - /// Destroy the Loader. - @trusted ~this() - { - reader_.destroy(); - scanner_.destroy(); - parser_.destroy(); - } /// Set stream _name. Used in debugging messages. void name(string name) pure @safe nothrow @nogc diff --git a/source/dyaml/nogcutil.d b/source/dyaml/nogcutil.d deleted file mode 100644 index 8b2bcf6..0000000 --- a/source/dyaml/nogcutil.d +++ /dev/null @@ -1,451 +0,0 @@ -// Copyright Ferdinand Majerech 2014, Digital Mars 2000-2012, Andrei Alexandrescu 2008- and Jonathan M Davis 2011-. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - - -/// @nogc versions of or alternatives to Phobos functions that are not yet @nogc and -/// wrappers to simplify their use. -module dyaml.nogcutil; - - - -import std.traits; -import std.typecons; -import std.typetuple; -import std.range; - - - -/// A NoGC version of std.conv.parse for integer types. -/// -/// Differences: -/// overflow parameter - bool set to true if there was integer overflow. -/// Asserts that at least one character was parsed instead of throwing an exception. -/// The caller must validate the inputs before calling parseNoGC. -Target parseNoGC(Target, Source)(ref Source s, uint radix, out bool overflow) - @safe pure nothrow @nogc - if (isSomeChar!(ElementType!Source) && - isIntegral!Target && !is(Target == enum)) -in { assert(radix >= 2 && radix <= 36); } -body -{ - immutable uint beyond = (radix < 10 ? '0' : 'a'-10) + radix; - - Target v = 0; - size_t atStart = true; - - // We can safely foreach over individual code points. - // Even with UTF-8 any digit is ASCII and anything not ASCII (such as the start of - // a UTF-8 sequence) is not a digit. - foreach(i; 0 .. s.length) - { - dchar c = s[i]; - // We can just take a char instead of decoding because anything non-ASCII is not - // going to be a decodable digit, i.e. we will end at such a byte. - if (c < '0' || c >= 0x80) - break; - if (radix < 10) - { - if (c >= beyond) - break; - } - else - { - if (c > '9') - { - c |= 0x20;//poorman's tolower - if (c < 'a' || c >= beyond) { break; } - c -= 'a'-10-'0'; - } - } - auto blah = cast(Target) (v * radix + c - '0'); - if (blah < v) - { - overflow = true; - return Target.max; - } - v = blah; - atStart = false; - } - assert(!atStart, "Nothing to parse in parse()"); - return v; -} - - -/// Buils a message to a buffer similarly to writef/writefln, but without -/// using GC. -/// -/// C snprintf would be better, but it isn't pure. -/// formattedWrite isn't completely @nogc yet (although it isn't GC-heavy). -/// -/// The user has to ensure buffer is long enough - an assert checks that we don't run -/// out of space. Currently this can only write strings and dchars. -char[] printNoGC(S...)(char[] buffer, S args) @safe pure nothrow @nogc -{ - auto appender = appenderNoGC(buffer); - - foreach(arg; args) - { - alias A = typeof(arg); - static if(is(A == char[]) || is(A == string)) { appender.put(arg); } - else static if(is(Unqual!A == dchar)) { appender.putDChar(arg); } - else static assert(false, "printNoGC does not support " ~ A.stringof); - } - - return appender.data; -} - - -/// A UFCS utility function to write a dchar to an AppenderNoGCFixed using writeDCharTo. -/// -/// The char $(B must) be a valid dchar. -void putDChar(ref AppenderNoGCFixed!(char[], char) appender, dchar c) - @safe pure nothrow @nogc -{ - char[4] dcharBuf; - if(c < 0x80) - { - dcharBuf[0] = cast(char)c; - appender.put(dcharBuf[0 .. 1]); - return; - } - // Should be safe to use as the first thing Reader does is validate everything. - const bytes = encodeValidCharNoGC(dcharBuf, c); - appender.put(dcharBuf[0 .. bytes]); -} - -/// Convenience function that returns an $(D AppenderNoGCFixed!A) using with $(D array) -/// for storage. -AppenderNoGCFixed!(E[]) appenderNoGC(A : E[], E)(A array) -{ - return AppenderNoGCFixed!(E[])(array); -} - -/// A gutted, NoGC version of std.array.appender. -/// -/// Works on a fixed-size buffer. -struct AppenderNoGCFixed(A : T[], T) -{ - import std.array; - - private struct Data - { - size_t capacity; - Unqual!T[] arr; - bool canExtend = false; - } - - private Data _data; - - @nogc: - - /// Construct an appender that will work with given buffer. - /// - /// Data written to the appender will overwrite the buffer from the start. - this(T[] arr) @safe pure nothrow - { - // initialize to a given array. - _data.arr = cast(Unqual!T[])arr[0 .. 0]; //trusted - _data.capacity = arr.length; - } - - /** - * Returns the capacity of the array (the maximum number of elements the - * managed array can accommodate before triggering a reallocation). If any - * appending will reallocate, $(D capacity) returns $(D 0). - */ - @property size_t capacity() const @safe pure nothrow - { - return _data.capacity; - } - - /** - * Returns the managed array. - */ - @property inout(T)[] data() inout @safe pure nothrow - { - return cast(typeof(return))(_data.arr); - } - - // ensure we can add nelems elements, resizing as necessary - private void ensureAddable(size_t nelems) @safe pure nothrow - { - assert(_data.capacity >= _data.arr.length + nelems, - "AppenderFixed ran out of space"); - } - - void put(U)(U[] items) if (is(Unqual!U == T)) - { - // make sure we have enough space, then add the items - ensureAddable(items.length); - immutable len = _data.arr.length; - immutable newlen = len + items.length; - - auto bigDataFun() @trusted nothrow { return _data.arr.ptr[0 .. newlen];} - auto bigData = bigDataFun(); - - alias UT = Unqual!T; - - bigData[len .. newlen] = items[]; - - //We do this at the end, in case of exceptions - _data.arr = bigData; - } - - // only allow overwriting data on non-immutable and non-const data - static if (isMutable!T) - { - /** - * Clears the managed array. This allows the elements of the array to be reused - * for appending. - * - * Note that clear is disabled for immutable or const element types, due to the - * possibility that $(D AppenderNoGCFixed) might overwrite immutable data. - */ - void clear() @safe pure nothrow - { - _data.arr = ()@trusted{ return _data.arr.ptr[0 .. 0]; }(); - } - } - else - { - /// Clear is not available for const/immutable data. - @disable void clear(); - } -} -@safe unittest -{ - char[256] buffer; - auto appender = appenderNoGC(buffer[]); - appender.put("found unsupported escape character: "); - appender.putDChar('a'); - appender.putDChar('á'); - assert(appender.data == "found unsupported escape character: aá"); -} - - -/// Result of a validateUTF8NoGC call. -struct ValidateResult -{ - /// Is the validated string valid? - bool valid; - /// Number of characters in the string. - /// - /// If the string is not valid, this is the number of valid characters before - /// hitting the first invalid sequence. - size_t characterCount; - /// If the string is not valid, error message with details is here. - string msg; -} - -/// Validate a UTF-8 string, checking if it is well-formed Unicode. -/// -/// See_Also: ValidateResult -ValidateResult validateUTF8NoGC(const(char[]) str) @safe pure nothrow @nogc -{ - immutable len = str.length; - size_t characterCount; - outer: for (size_t index = 0; index < len; ) - { - if(str[index] < 0x80) - { - ++index; - ++characterCount; - continue; - } - - auto decoded = decodeUTF8NoGC!(No.validated)(str, index); - if(decoded.errorMessage !is null) - { - return ValidateResult(false, characterCount, decoded.errorMessage); - } - ++characterCount; - } - - return ValidateResult(true, characterCount); -} - -/// @nogc version of std.utf.decode() for char[]. -/// -/// The caller $(B must) handle ASCII (< 0x80) characters manually; this is asserted to -/// force code using this function to be efficient. -/// -/// Params: -/// -/// validated = If ture, assume str is a valid UTF-8 string and don't generate any -/// error-checking code. If validated is true, str $(B must) be a valid -/// character, otherwise undefined behavior will occur. Also affects the -/// return type. -/// str = Will decode the first code point from this string. -/// index = Index in str where the code point starts. Will be updated to point to -/// the next code point. -/// -/// Returns: If validated is true, the decoded character. -/// Otherwise a struct with a 'decoded' member - the decoded character, and a -/// 'string errorMessage' member that is null on success and otherwise stores -/// the error message. -auto decodeUTF8NoGC(Flag!"validated" validated)(const(char[]) str, ref size_t index) -{ - static if(!validated) struct Result - { - dchar decoded; - string errorMessage; - } - else alias Result = dchar; - - /// Dchar bitmask for different numbers of UTF-8 code units. - enum bitMask = tuple((1 << 7) - 1, (1 << 11) - 1, (1 << 16) - 1, (1 << 21) - 1); - - auto pstr = str[index..$]; - - immutable length = str.length - index; - ubyte fst = pstr[0]; - - assert(fst & 0x80); - enum invalidUTFMsg = "Invalid UTF-8 sequence"; - static if(!validated) { enum invalidUTF = Result(cast(dchar)int.max, invalidUTFMsg); } - - // starter must have at least 2 first bits set - static if(validated) - { - assert((fst & 0b1100_0000) == 0b1100_0000, invalidUTFMsg); - } - else if((fst & 0b1100_0000) != 0b1100_0000) - { - return invalidUTF; - } - - ubyte tmp = void; - dchar d = fst; // upper control bits are masked out later - fst <<= 1; - - - foreach (i; TypeTuple!(1, 2, 3)) - { - static if(validated) { assert(i != length, "Decoding out of bounds"); } - else if(i == length) { return Result(cast(dchar)int.max, "Decoding out of bounds"); } - - tmp = pstr[i]; - static if(validated) { assert((tmp & 0xC0) == 0x80, invalidUTFMsg); } - else if((tmp & 0xC0) != 0x80) { return invalidUTF; } - - d = (d << 6) | (tmp & 0x3F); - fst <<= 1; - - if (!(fst & 0x80)) // no more bytes - { - d &= bitMask[i]; // mask out control bits - - // overlong, could have been encoded with i bytes - static if(validated) { assert((d & ~bitMask[i - 1]) != 0, invalidUTFMsg); } - else if((d & ~bitMask[i - 1]) == 0) { return invalidUTF; } - - // check for surrogates only needed for 3 bytes - static if (i == 2) - { - static if(validated) { assert(isValidDchar(d), invalidUTFMsg); } - else if(!isValidDchar(d)) { return invalidUTF; } - } - - index += i + 1; - static if (i == 3) - { - static if(validated) { assert(d <= dchar.max, invalidUTFMsg); } - else if(d > dchar.max) { return invalidUTF; } - } - - return Result(d); - } - } - - static if(validated) { assert(false, invalidUTFMsg); } - else { return invalidUTF; } -} - -/// ditto -alias decodeValidUTF8NoGC = decodeUTF8NoGC!(Yes.validated); - -/// @nogc version of std.utf.encode() for char[]. -/// -/// The caller $(B must) handle ASCII (< 0x80) characters manually; this is asserted to -/// force code using this function to be efficient. -/// -/// Params: -/// validated = If true, asssume c is a valid, non-surrogate UTF-32 code point and don't -/// generate any error-checking code. If validated is true, c $(B must) be -/// a valid character, otherwise undefined behavior will occur. Also affects -/// the return type. -/// buf = Buffer to write the encoded result to. -/// c = Character to encode. -/// -/// Returns: If validated is true, number of bytes the encoded character takes up in buf. -/// Otherwise a struct with a 'bytes' member specifying the number of bytes of -/// the endocded character, and a 'string errorMessage' member that is null -/// if there was no error and otherwise stores the error message. -auto encodeCharNoGC(Flag!"validated" validated)(ref char[4] buf, dchar c) - @safe pure nothrow @nogc -{ - static if(!validated) struct Result - { - size_t bytes; - string errorMessage; - } - else alias Result = size_t; - - // Force the caller to optimize ASCII (the 1-byte case) - assert(c >= 0x80, "Caller should explicitly handle ASCII chars"); - if (c <= 0x7FF) - { - assert(isValidDchar(c)); - buf[0] = cast(char)(0xC0 | (c >> 6)); - buf[1] = cast(char)(0x80 | (c & 0x3F)); - return Result(2); - } - if (c <= 0xFFFF) - { - static if(validated) - { - assert(0xD800 > c || c > 0xDFFF, - "Supposedly valid code point is a surrogate code point"); - } - else if(0xD800 <= c && c <= 0xDFFF) - { - return Result(size_t.max, "Can't encode a surrogate code point in UTF-8"); - } - - assert(isValidDchar(c)); - buf[0] = cast(char)(0xE0 | (c >> 12)); - buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); - buf[2] = cast(char)(0x80 | (c & 0x3F)); - return Result(3); - } - if (c <= 0x10FFFF) - { - assert(isValidDchar(c)); - buf[0] = cast(char)(0xF0 | (c >> 18)); - buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); - buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); - buf[3] = cast(char)(0x80 | (c & 0x3F)); - return Result(4); - } - - assert(!isValidDchar(c)); - static if(!validated) - { - return Result(size_t.max, "Can't encode an invalid code point in UTF-8"); - } - else - { - assert(false, "Supposedly valid code point is invalid"); - } -} - -/// ditto -alias encodeValidCharNoGC = encodeCharNoGC!(Yes.validated); - -/// @nogc version of std.utf.isValidDchar -bool isValidDchar(dchar c) @safe pure nothrow @nogc -{ - return c < 0xD800 || (c > 0xDFFF && c <= 0x10FFFF); -} diff --git a/source/dyaml/parser.d b/source/dyaml/parser.d index 578f205..44437d6 100644 --- a/source/dyaml/parser.d +++ b/source/dyaml/parser.d @@ -142,16 +142,6 @@ final class Parser marks_.reserve(32); } - ///Destroy the parser. - @trusted ~this() - { - currentEvent_.destroy(); - tagDirectives_.destroy(); - tagDirectives_ = null; - states_.destroy(); - marks_.destroy(); - } - /** * Check if the next event is one of specified types. * @@ -538,8 +528,7 @@ final class Parser { string notInPlace; bool inEscape = false; - import dyaml.nogcutil; - auto appender = appenderNoGC(cast(char[])tokenValue); + auto appender = appender!(char[])(); for(char[] oldValue = tokenValue; !oldValue.empty();) { const dchar c = oldValue.front(); @@ -549,7 +538,7 @@ final class Parser { if(c != '\\') { - if(notInPlace is null) { appender.putDChar(c); } + if(notInPlace is null) { appender.put(c); } else { notInPlace ~= c; } continue; } @@ -570,7 +559,7 @@ final class Parser // many-byte unicode chars if(c != 'L' && c != 'P') { - appender.putDChar(dyaml.escapes.fromEscape(c)); + appender.put(dyaml.escapes.fromEscape(c)); continue; } // Need to duplicate as we won't fit into @@ -596,10 +585,8 @@ final class Parser assert(!hex.canFind!(d => !d.isHexDigit), "Scanner must ensure the hex string is valid"); - bool overflow; - const decoded = cast(dchar)parseNoGC!int(hex, 16u, overflow); - assert(!overflow, "Scanner must ensure there's no overflow"); - if(notInPlace is null) { appender.putDChar(decoded); } + const decoded = cast(dchar)parse!int(hex, 16u); + if(notInPlace is null) { appender.put(decoded); } else { notInPlace ~= decoded; } continue; } diff --git a/source/dyaml/queue.d b/source/dyaml/queue.d index 9ce87b9..5024489 100644 --- a/source/dyaml/queue.d +++ b/source/dyaml/queue.d @@ -47,9 +47,6 @@ struct Queue(T) /// Cursor pointing to the current node in iteration. Node* cursor_ = null; - /// The first element of a linked list of freed Nodes available for recycling. - Node* freeList_ = null; - /// Length of the queue. size_t length_ = 0; @@ -58,20 +55,6 @@ struct Queue(T) @disable bool opEquals(ref Queue); @disable int opCmp(ref Queue); - /// Destroy the queue, deallocating all its elements. - @trusted nothrow ~this() - { - while(!empty) { pop(); } - while(freeList_ !is null) - { - auto toFree = freeList_; - freeList_ = toFree.next_; - free(toFree); - } - cursor_ = last_ = first_ = null; - length_ = 0; - } - /// Start iterating over the queue. void startIteration() @safe pure nothrow @nogc { @@ -101,7 +84,7 @@ struct Queue(T) /// Push new item to the queue. void push(T item) @safe nothrow { - Node* newLast = newNode(item, null); + Node* newLast = new Node(item, null); if(last_ !is null) { last_.next_ = newLast; } if(first_ is null) { first_ = newLast; } last_ = newLast; @@ -118,7 +101,7 @@ struct Queue(T) { if(idx == 0) { - first_ = newNode(item, first_); + first_ = new Node(item, first_); ++length_; } // Adding before last added element, so we can just push. @@ -130,7 +113,7 @@ struct Queue(T) foreach(i; 1 .. idx) { current = current.next_; } // Insert a new node after current, and put current.next_ behind it. - current.next_ = newNode(item, current.next_); + current.next_ = new Node(item, current.next_); ++length_; } } @@ -147,9 +130,6 @@ struct Queue(T) Node* popped = first_; first_ = first_.next_; - Node* oldFree = freeList_; - freeList_ = popped; - freeList_.next_ = oldFree; if(--length_ == 0) { assert(first_ is null); @@ -181,43 +161,6 @@ struct Queue(T) { return length_; } - -private: - /// Get a new (or recycled) node with specified item and next node pointer. - /// - /// Tries to reuse a node from freeList_, allocates a new node if not possible. - Node* newNode(ref T item, Node* next) @trusted nothrow - { - if(freeList_ !is null) - { - auto node = freeList_; - freeList_ = freeList_.next_; - *node = Node(item, next); - return node; - } - return allocate!Node(item, next); - } -} - - -private: - -/// Allocate a struct, passing arguments to its constructor or default initializer. -T* allocate(T, Args...)(Args args) @system nothrow -{ - T* ptr = cast(T*)malloc(T.sizeof); - *ptr = T(args); - // The struct might contain references to GC-allocated memory, so tell the GC about it. - static if(hasIndirections!T) { GC.addRange(cast(void*)ptr, T.sizeof); } - return ptr; -} - -/// Deallocate struct pointed at by specified pointer. -void free(T)(T* ptr) @system nothrow -{ - // GC doesn't need to care about any references in this struct anymore. - static if(hasIndirections!T) { GC.removeRange(cast(void*)ptr); } - core.stdc.stdlib.free(ptr); } @safe unittest diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d index 7ca242a..82e0a3d 100644 --- a/source/dyaml/reader.d +++ b/source/dyaml/reader.d @@ -22,12 +22,10 @@ import std.utf; import tinyendian; -import dyaml.fastcharsearch; import dyaml.encoding; import dyaml.exception; -import dyaml.nogcutil; - +alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029'); package: @@ -133,7 +131,7 @@ final class Reader /// // XXX removed; search for 'risky' to find why. // Throws: ReaderException if trying to read past the end of the buffer. - dchar peek(const size_t index) @safe pure nothrow @nogc + dchar peek(const size_t index) @safe pure { if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; } if(characterCount_ <= charIndex_ + index) @@ -159,7 +157,7 @@ final class Reader ++lastDecodedBufferOffset_; return b; } - return decodeValidUTF8NoGC(buffer_, lastDecodedBufferOffset_); + return decode(buffer_, lastDecodedBufferOffset_); } // 'Slow' path where we decode everything up to the requested character. @@ -176,7 +174,7 @@ final class Reader } /// Optimized version of peek() for the case where peek index is 0. - dchar peek() @safe pure nothrow @nogc + dchar peek() @safe pure { if(upcomingASCII_ > 0) { return buffer_[bufferOffset_]; } if(characterCount_ <= charIndex_) { return '\0'; } @@ -216,7 +214,7 @@ final class Reader /// slice will be shorter. /// /// Returns: Characters starting at current position or an empty slice if out of bounds. - char[] prefix(const size_t length) @safe pure nothrow @nogc + char[] prefix(const size_t length) @safe pure { return slice(length); } @@ -249,7 +247,7 @@ final class Reader /// be shorter. /// /// Returns: Slice into the internal buffer or an empty slice if out of bounds. - char[] slice(const size_t end) @safe pure nothrow @nogc + char[] slice(const size_t end) @safe pure { // Fast path in case the caller has already peek()ed all the way to end. if(end == lastDecodedCharOffset_) @@ -277,7 +275,7 @@ final class Reader /// /// Throws: ReaderException if trying to read past the end of the buffer /// or if invalid data is read. - dchar get() @safe pure nothrow @nogc + dchar get() @safe pure { const result = peek(); forward(); @@ -289,7 +287,7 @@ final class Reader /// Params: length = Number or characters (code points, not bytes) to get. /// /// Returns: Characters starting at current position. - char[] get(const size_t length) @safe pure nothrow @nogc + char[] get(const size_t length) @safe pure { auto result = slice(length); forward(length); @@ -299,10 +297,8 @@ final class Reader /// Move current position forward. /// /// Params: length = Number of characters to move position forward. - void forward(size_t length) @safe pure nothrow @nogc + void forward(size_t length) @safe pure { - mixin FastCharSearch!"\n\u0085\u2028\u2029"d search; - while(length > 0) { auto asciiToTake = min(upcomingASCII_, length); @@ -336,10 +332,10 @@ final class Reader "ASCII must be handled by preceding code"); ++charIndex_; - const c = decodeValidUTF8NoGC(buffer_, bufferOffset_); + const c = decode(buffer_, bufferOffset_); // New line. (can compare with '\n' without decoding since it's ASCII) - if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) { ++line_; column_ = 0; @@ -354,7 +350,7 @@ final class Reader } /// Move current position forward by one character. - void forward() @safe pure nothrow @nogc + void forward() @safe pure { ++charIndex_; lastDecodedBufferOffset_ = bufferOffset_; @@ -377,16 +373,15 @@ final class Reader } // UTF-8 - mixin FastCharSearch!"\n\u0085\u2028\u2029"d search; assert(bufferOffset_ < buffer_.length, "Attempted to decode past the end of YAML buffer"); assert(buffer_[bufferOffset_] >= 0x80, "ASCII must be handled by preceding code"); - const c = decodeValidUTF8NoGC(buffer_, bufferOffset_); + const c = decode(buffer_, bufferOffset_); // New line. (can compare with '\n' without decoding since it's ASCII) - if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) { ++line_; column_ = 0; @@ -425,7 +420,7 @@ private: // lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them. // // Does not advance the buffer position. Used in peek() and slice(). - dchar decodeNext() @safe pure nothrow @nogc + dchar decodeNext() @safe pure { assert(lastDecodedBufferOffset_ < buffer_.length, "Attempted to decode past the end of YAML buffer"); @@ -438,7 +433,7 @@ private: return b; } - return decodeValidUTF8NoGC(buffer_, lastDecodedBufferOffset_); + return decode(buffer_, lastDecodedBufferOffset_); } } @@ -558,7 +553,7 @@ public: /// Data can only be written up to the current position in the Reader buffer. /// /// See_Also: begin - void write(dchar c) @safe pure nothrow @nogc + void write(dchar c) @safe pure { assert(inProgress, "write called without begin"); if(c < 0x80) @@ -569,7 +564,7 @@ public: // We need to encode a non-ASCII dchar into UTF-8 char[4] encodeBuf; - const bytes = encodeValidCharNoGC(encodeBuf, c); + const bytes = encode(encodeBuf, c); reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes]; end_ += bytes; } @@ -585,7 +580,7 @@ public: /// position = Position to insert the character at in code units, not code points. /// Must be less than slice length(); a previously returned length() /// can be used. - void insert(const dchar c, const size_t position) @safe pure nothrow @nogc + void insert(const dchar c, const size_t position) @safe pure { assert(inProgress, "insert called without begin"); assert(start_ + position <= end_, "Trying to insert after the end of the slice"); @@ -596,7 +591,7 @@ public: // Encode c into UTF-8 char[4] encodeBuf; if(c < 0x80) { encodeBuf[0] = cast(char)c; } - const size_t bytes = c < 0x80 ? 1 : encodeValidCharNoGC(encodeBuf, c); + const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); if(movedLength > 0) { @@ -660,16 +655,14 @@ public: } /// Destroy the transaction and revert it if it hasn't been committed yet. - /// - /// Does nothing for a default-initialized transaction. - ~this() @safe pure nothrow @nogc + void end() @safe pure nothrow @nogc { - if(builder_ is null || committed_) { return; } - assert(builder_.endStackUsed_ == stackLevel_ + 1, + assert(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions"); builder_.pop(); builder_ = null; } + } private: @@ -765,13 +758,8 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow continue; } - const encodeResult = encodeCharNoGC!(No.validated)(encodeBuf, c); - if(encodeResult.errorMessage !is null) - { - result.errorMessage = encodeResult.errorMessage; - return; - } - const bytes = encodeResult.bytes; + std.utf.encode(encodeBuf, c); + const bytes = codeLength!char(c); utf8[length .. length + bytes] = encodeBuf[0 .. bytes]; length += bytes; } @@ -789,14 +777,8 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow { case UTFEncoding.UTF_8: result.utf8 = cast(char[])input; - const validateResult = result.utf8.validateUTF8NoGC(); - if(!validateResult.valid) - { - result.errorMessage = "UTF-8 validation error after character #" ~ - validateResult.characterCount.to!string ~ ": " ~ - validateResult.msg; - } - result.characterCount = validateResult.characterCount; + result.utf8.validate(); + result.characterCount = std.utf.count(result.utf8); break; case UTFEncoding.UTF_16: assert(input.length % 2 == 0, "UTF-16 buffer size must be even"); @@ -818,7 +800,7 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow } /// Determine if all characters (code points, not bytes) in a string are printable. -bool isPrintableValidUTF8(const char[] chars) @safe pure nothrow @nogc +bool isPrintableValidUTF8(const char[] chars) @safe pure { // This is oversized (only 128 entries are necessary) simply because having 256 // entries improves performance... for some reason (alignment?) @@ -918,7 +900,7 @@ bool isPrintableValidUTF8(const char[] chars) @safe pure nothrow @nogc if(index == chars.length) { break; } // Not ASCII, need to decode. - const dchar c = decodeValidUTF8NoGC(chars, index); + const dchar c = decode(chars, index); // We now c is not ASCII, so only check for printable non-ASCII chars. if(!(c == 0x85 || (c >= 0xA0 && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD') || diff --git a/source/dyaml/representer.d b/source/dyaml/representer.d index 0dac787..f31cd78 100644 --- a/source/dyaml/representer.d +++ b/source/dyaml/representer.d @@ -81,13 +81,6 @@ final class Representer addRepresenter!SysTime(&representSysTime); } - ///Destroy the Representer. - ~this() pure @safe nothrow - { - representers_.destroy(); - representers_ = null; - } - ///Set default _style for scalars. If style is $(D ScalarStyle.Invalid), the _style is chosen automatically. @property void defaultScalarStyle(ScalarStyle style) pure @safe nothrow { diff --git a/source/dyaml/resolver.d b/source/dyaml/resolver.d index 1d4bda8..5eae47f 100644 --- a/source/dyaml/resolver.d +++ b/source/dyaml/resolver.d @@ -68,13 +68,6 @@ final class Resolver if(defaultImplicitResolvers){addImplicitResolvers();} } - ///Destroy the Resolver. - ~this() pure @safe nothrow - { - yamlImplicitResolvers_.destroy(); - yamlImplicitResolvers_ = null; - } - /** * Add an implicit scalar resolver. * diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d index df0b2f9..1babf99 100644 --- a/source/dyaml/scanner.d +++ b/source/dyaml/scanner.d @@ -20,11 +20,10 @@ import std.exception; import std.string; import std.typecons; import std.traits : Unqual; +import std.utf; -import dyaml.fastcharsearch; import dyaml.escapes; import dyaml.exception; -import dyaml.nogcutil; import dyaml.queue; import dyaml.reader; import dyaml.style; @@ -53,6 +52,21 @@ package: /// TAG(value) /// SCALAR(value, plain, style) +alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%'); + +alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\'); /// Marked exception thrown at scanner errors. /// @@ -153,14 +167,6 @@ final class Scanner /// not. char[256] msgBuffer_; - /// Used to detect if a character is any whitespace plus '\0' - mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace; - /// Used to detect if a character is any line break plus '\0' - mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks; - - /// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch. - alias canFind = std.algorithm.canFind; - public: /// Construct a Scanner using specified Reader. this(Reader reader) @safe nothrow @@ -170,16 +176,6 @@ final class Scanner fetchStreamStart(); } - /// Destroy the scanner. - ~this() @trusted - { - tokens_.destroy(); - indents_.destroy(); - possibleSimpleKeys_.destroy(); - possibleSimpleKeys_ = null; - reader_ = null; - } - /// Check if the next token is one of specified types. /// /// If no types are specified, checks if any tokens are left. @@ -235,7 +231,13 @@ final class Scanner /// Build an error message in msgBuffer_ and return it as a string. string buildMsg(S ...)(S args) @trusted { - return cast(string)msgBuffer_.printNoGC(args); + try { + return text(args); + } + catch (Exception) + { + return ""; + } } /// Most scanning error messages have the same format; so build them with this @@ -739,7 +741,7 @@ final class Scanner tokens_.push(plain); } - pure nothrow @nogc: + pure: ///Check if the next token is DIRECTIVE: ^ '%' ... bool checkDirective() @safe @@ -754,7 +756,7 @@ final class Scanner return reader_.column == 0 && reader_.peekByte() == '-' && reader_.prefix(3) == "---" && - searchAllWhitespace.canFind(reader_.peek(3)); + reader_.peek(3).isWhiteSpace; } /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') @@ -764,13 +766,13 @@ final class Scanner return reader_.column == 0 && reader_.peekByte() == '.' && reader_.prefix(3) == "..." && - searchAllWhitespace.canFind(reader_.peek(3)); + reader_.peek(3).isWhiteSpace; } /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') bool checkBlockEntry() @safe { - return searchAllWhitespace.canFind(reader_.peek(1)); + return !!reader_.peek(1).isWhiteSpace; } /// Check if the next token is KEY(flow context): '?' @@ -778,7 +780,7 @@ final class Scanner /// or KEY(block context): '?' (' '|'\n') bool checkKey() @safe { - return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1))); + return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace); } /// Check if the next token is VALUE(flow context): ':' @@ -786,7 +788,7 @@ final class Scanner /// or VALUE(block context): ':' (' '|'\n') bool checkValue() @safe { - return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)); + return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace; } /// Check if the next token is a plain scalar. @@ -806,13 +808,11 @@ final class Scanner bool checkPlain() @safe { const c = reader_.peek(); - mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d - searchPlainNotFirstChar; - if(!searchPlainNotFirstChar.canFind(c)) + if(!c.isNonScalarStartCharacter) { return true; } - return !searchAllWhitespace.canFind(reader_.peek(1)) && + return !reader_.peek(1).isWhiteSpace && (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); } @@ -848,7 +848,7 @@ final class Scanner /// Scan and throw away all characters until next line break. void scanToNextBreak() @safe { - while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); } + while(!reader_.peek().isBreak) { reader_.forward(); } } /// Scan all characters until next line break. @@ -858,7 +858,7 @@ final class Scanner void scanToNextBreakToSlice() @safe { uint length = 0; - while(!searchAllBreaks.canFind(reader_.peek(length))) + while(!reader_.peek(length).isBreak) { ++length; } @@ -1072,7 +1072,7 @@ final class Scanner { findNextNonSpace(); if(reader_.peekByte() == '#') { scanToNextBreak(); } - if(searchAllBreaks.canFind(reader_.peek())) + if(reader_.peek().isBreak) { scanLineBreak(); return; @@ -1106,7 +1106,7 @@ final class Scanner char[] value = reader_.sliceBuilder.finish(); if(error_) { return Token.init; } - if(!searchAllWhitespace.canFind(reader_.peek()) && + if(!reader_.peek().isWhiteSpace && !"?:,]}%@"d.canFind(reader_.peekByte())) { enum anchorCtx = "While scanning an anchor"; @@ -1141,7 +1141,6 @@ final class Scanner // (slice) we will produce. uint handleEnd; - mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search; if(c == '<') { reader_.forward(2); @@ -1157,7 +1156,7 @@ final class Scanner } reader_.forward(); } - else if(searchAllWhitespace.canFind(c)) + else if(c.isWhiteSpace) { reader_.forward(); handleEnd = 0; @@ -1168,7 +1167,7 @@ final class Scanner uint length = 1; bool useHandle = false; - while(!search.canFind(c)) + while(!c.isBreakOrSpace) { if(c == '!') { @@ -1196,7 +1195,7 @@ final class Scanner if(error_) { return Token.init; } } - if(search.canFind(reader_.peek())) + if(reader_.peek().isBreakOrSpace) { char[] slice = reader_.sliceBuilder.finish(); return tagToken(startMark, reader_.mark, slice, handleEnd); @@ -1326,7 +1325,7 @@ final class Scanner // (which are at the end of the scalar). Otherwise re remove them (end the // transaction). if(chomping == Chomping.Keep) { breaksTransaction.commit(); } - else { breaksTransaction.__dtor(); } + else { breaksTransaction.end(); } if(chomping != Chomping.Strip && lineBreak != int.max) { // If chomping is Keep, we keep the line break but the first line break @@ -1435,7 +1434,7 @@ final class Scanner findNextNonSpace(); if(reader_.peekByte()== '#') { scanToNextBreak(); } - if(searchAllBreaks.canFind(reader_.peek())) + if(reader_.peek().isBreak) { scanLineBreak(); return; @@ -1527,8 +1526,6 @@ final class Scanner { dchar c = reader_.peek(); - mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search; - size_t numCodePoints = 0; // This is an optimized way of writing: // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; } @@ -1546,8 +1543,8 @@ final class Scanner for(size_t i = oldSliceLength; i < slice.length;) { // slice is UTF-8 - need to decode - const ch = slice[i] < 0x80 ? slice[i++] : decodeValidUTF8NoGC(slice, i); - if(search.canFind(ch)) { break outer; } + const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i); + if(ch.isFlowScalarBreakSpace) { break outer; } ++numCodePoints; } oldSliceLength = slice.length; @@ -1596,14 +1593,15 @@ final class Scanner char[2] escapeStart = ['\\', cast(char) c]; reader_.sliceBuilder.write(escapeStart); reader_.sliceBuilder.write(hex); - bool overflow; // Note: This is just error checking; Parser does the actual // escaping (otherwise we could accidentally create an // escape sequence here that wasn't in input, breaking the // escaping code in parser, which is in parser because it // can't always be done in place) - parseNoGC!int(hex, 16u, overflow); - if(overflow) + try { + parse!int(hex, 16u); + } + catch (Exception) { error("While scanning a double quoted scalar", startMark, "overflow when parsing an escape sequence of " ~ @@ -1689,7 +1687,7 @@ final class Scanner // Instead of checking indentation, we check for document separators. const prefix = reader_.prefix(3); if((prefix == "---" || prefix == "...") && - searchAllWhitespace.canFind(reader_.peek(3))) + reader_.peek(3).isWhiteSpace) { error("While scanning a quoted scalar", startMark, "found unexpected document separator", reader_.mark); @@ -1741,8 +1739,8 @@ final class Scanner for(;;) { const cNext = reader_.peek(length + 1); - if(searchAllWhitespace.canFind(c) || - (c == ':' && searchAllWhitespace.canFind(cNext))) + if(c.isWhiteSpace || + (c == ':' && cNext.isWhiteSpace)) { break; } @@ -1755,7 +1753,7 @@ final class Scanner for(;;) { c = reader_.peek(length); - if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c)) + if(c.isWhiteSpace || ",:?[]{}"d.canFind(c)) { break; } @@ -1765,7 +1763,7 @@ final class Scanner // It's not clear what we should do with ':' in the flow context. if(flowLevel_ > 0 && c == ':' && - !searchAllWhitespace.canFind(reader_.peek(length + 1)) && + !reader_.peek(length + 1).isWhiteSpace && !",[]{}"d.canFind(reader_.peek(length + 1))) { // This is an error; throw the slice away. @@ -1799,7 +1797,7 @@ final class Scanner } } - spacesTransaction.__dtor(); + spacesTransaction.end(); char[] slice = reader_.sliceBuilder.finish(); return scalarToken(startMark, endMark, slice, ScalarStyle.Plain); @@ -1821,10 +1819,7 @@ final class Scanner reader_.forward(length); dchar c = reader_.peek(); - mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search; - // No newline after the spaces (if any) - // (Excluding ' ' so we can use the same FastCharSearch as below) - if(!search.canFind(c) && c != ' ') + if(!c.isNSChar) { // We have spaces, but no newline. if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } @@ -1835,7 +1830,7 @@ final class Scanner const lineBreak = scanLineBreak(); allowSimpleKey_ = true; - static bool end(Reader reader_) @safe pure nothrow @nogc + static bool end(Reader reader_) @safe pure { const prefix = reader_.prefix(3); return ("---" == prefix || "..." == prefix) @@ -1849,7 +1844,7 @@ final class Scanner alias Transaction = SliceBuilder.Transaction; auto transaction = Transaction(&reader_.sliceBuilder); if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } - while(search.canFind(reader_.peek())) + while(reader_.peek().isNSChar) { if(reader_.peekByte() == ' ') { reader_.forward(); } else @@ -1917,8 +1912,7 @@ final class Scanner const startLen = reader_.sliceBuilder.length; { uint length = 0; - mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search; - while(c.isAlphaNum || search.canFind(c)) + while(c.isAlphaNum || c.isURIChar) { if(c == '%') { @@ -1978,9 +1972,7 @@ final class Scanner } else { - const decoded = decodeUTF8NoGC!(No.validated)(bytes[], nextChar); - if(decoded.errorMessage !is null) { return size_t.max; } - c = decoded.decoded; + c = decode(bytes[], nextChar); } reader_.sliceBuilder.write(c); if(bytes.length - nextChar > 0) diff --git a/source/dyaml/serializer.d b/source/dyaml/serializer.d index ae5ec58..c69772a 100644 --- a/source/dyaml/serializer.d +++ b/source/dyaml/serializer.d @@ -85,12 +85,6 @@ struct Serializer ~this() @safe { emitter_.emit(streamEndEvent(Mark(), Mark())); - YAMLVersion_.destroy(); - YAMLVersion_ = null; - serializedNodes_.destroy(); - serializedNodes_ = null; - anchors_.destroy(); - anchors_ = null; } ///Serialize a node, emitting it in the process. diff --git a/source/dyaml/unused.d b/source/dyaml/unused.d deleted file mode 100644 index 73864f7..0000000 --- a/source/dyaml/unused.d +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright Ferdinand Majerech 2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - - -// Code that is currently unused but may be useful for future D:YAML releases -module dyaml.unused; - - - -import std.utf; - -import tinyendian; - -// Decode an UTF-8/16/32 buffer to UTF-32 (for UTF-32 this does nothing). -// -// Params: -// -// input = The UTF-8/16/32 buffer to decode. -// encoding = Encoding of input. -// -// Returns: -// -// A struct with the following members: -// -// $(D string errorMessage) In case of a decoding error, the error message is stored -// here. If there was no error, errorMessage is NULL. Always -// check this first before using the other members. -// $(D dchar[] decoded) A GC-allocated buffer with decoded UTF-32 characters. -auto decodeUTF(ubyte[] input, UTFEncoding encoding) @safe pure nothrow -{ - // Documented in function ddoc. - struct Result - { - string errorMessage; - dchar[] decoded; - } - - Result result; - - // Decode input_ if it's encoded as UTF-8 or UTF-16. - // - // Params: - // - // buffer = The input buffer to decode. - // result = A Result struct to put decoded result and any error messages to. - // - // On error, result.errorMessage will be set. - static void decode(C)(C[] input, ref Result result) - { - // End of part of input that contains complete characters that can be decoded. - const size_t end = endOfLastUTFSequence(input); - // If end is 0, there are no full chars. - // This can happen at the end of file if there is an incomplete UTF sequence. - if(end < input.length) - { - result.errorMessage = "Invalid UTF character at the end of input"; - return; - } - - const srclength = input.length; - try for(size_t srcpos = 0; srcpos < srclength;) - { - const c = input[srcpos]; - if(c < 0x80) - { - result.decoded ~= c; - ++srcpos; - } - else - { - result.decoded ~= std.utf.decode(input, srcpos); - } - } - catch(UTFException e) - { - result.errorMessage = e.msg; - return; - } - catch(Exception e) - { - assert(false, "Unexpected exception in decode(): " ~ e.msg); - } - } - - final switch(encoding) - { - case UTFEncoding.UTF_8: decode(cast(char[])input, result); break; - case UTFEncoding.UTF_16: - assert(input.length % 2 == 0, "UTF-16 buffer size must be even"); - decode(cast(wchar[])input, result); - break; - case UTFEncoding.UTF_32: - assert(input.length % 4 == 0, - "UTF-32 buffer size must be a multiple of 4"); - // No need to decode anything - result.decoded = cast(dchar[])input; - break; - } - - if(result.errorMessage !is null) { return result; } - - return result; -} - - -// Determine the end of last UTF-8 or UTF-16 sequence in a raw buffer. -size_t endOfLastUTFSequence(C)(const C[] buffer) -{ - static if(is(C == char)) - { - for(long end = buffer.length - 1; end >= 0; --end) - { - const stride = utf8Stride[buffer[cast(size_t)end]]; - if(stride != 0xFF) - { - // If stride goes beyond end of the buffer, return end. - // Otherwise the last sequence ends at buffer.length, so we can - // return that. (Unless there is an invalid code unit, which is - // caught at decoding) - return (stride > buffer.length - end) ? cast(size_t)end : buffer.length; - } - } - return 0; - } - else static if(is(C == wchar)) - { - // TODO this is O(N), which is slow. Find out if we can somehow go - // from the end backwards with UTF-16. - size_t end = 0; - while(end < buffer.length) - { - const s = stride(buffer, end); - if(s + end > buffer.length) { break; } - end += s; - } - return end; - } -} - -// UTF-8 codepoint strides (0xFF are codepoints that can't start a sequence). -immutable ubyte[256] utf8Stride = -[ - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, - 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, - 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, - 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, -];