Merge pull request #115 from Herringway/remove-redundancy
Remove redundant functions merged-on-behalf-of: BBasile <BBasile@users.noreply.github.com>
This commit is contained in:
commit
a20d7143e7
|
@ -18,13 +18,11 @@ dyaml_src = [
|
||||||
'source/dyaml/escapes.d',
|
'source/dyaml/escapes.d',
|
||||||
'source/dyaml/event.d',
|
'source/dyaml/event.d',
|
||||||
'source/dyaml/exception.d',
|
'source/dyaml/exception.d',
|
||||||
'source/dyaml/fastcharsearch.d',
|
|
||||||
'source/dyaml/flags.d',
|
'source/dyaml/flags.d',
|
||||||
'source/dyaml/hacks.d',
|
'source/dyaml/hacks.d',
|
||||||
'source/dyaml/linebreak.d',
|
'source/dyaml/linebreak.d',
|
||||||
'source/dyaml/loader.d',
|
'source/dyaml/loader.d',
|
||||||
'source/dyaml/node.d',
|
'source/dyaml/node.d',
|
||||||
'source/dyaml/nogcutil.d',
|
|
||||||
'source/dyaml/package.d',
|
'source/dyaml/package.d',
|
||||||
'source/dyaml/parser.d',
|
'source/dyaml/parser.d',
|
||||||
'source/dyaml/queue.d',
|
'source/dyaml/queue.d',
|
||||||
|
@ -46,8 +44,7 @@ dyaml_src = [
|
||||||
'source/dyaml/test/representer.d',
|
'source/dyaml/test/representer.d',
|
||||||
'source/dyaml/test/resolver.d',
|
'source/dyaml/test/resolver.d',
|
||||||
'source/dyaml/test/tokens.d',
|
'source/dyaml/test/tokens.d',
|
||||||
'source/dyaml/token.d',
|
'source/dyaml/token.d'
|
||||||
'source/dyaml/unused.d'
|
|
||||||
]
|
]
|
||||||
install_subdir('source/dyaml', install_dir: 'include/d/yaml/')
|
install_subdir('source/dyaml', install_dir: 'include/d/yaml/')
|
||||||
|
|
||||||
|
|
|
@ -100,17 +100,6 @@ final class Constructor
|
||||||
addConstructorScalar("tag:yaml.org,2002:merge", &constructMerge);
|
addConstructorScalar("tag:yaml.org,2002:merge", &constructMerge);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Destroy the constructor.
|
|
||||||
@nogc pure @safe nothrow ~this()
|
|
||||||
{
|
|
||||||
fromScalar_.destroy();
|
|
||||||
fromScalar_ = null;
|
|
||||||
fromSequence_.destroy();
|
|
||||||
fromSequence_ = null;
|
|
||||||
fromMapping_.destroy();
|
|
||||||
fromMapping_ = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Add a constructor function from scalar.
|
/** Add a constructor function from scalar.
|
||||||
*
|
*
|
||||||
* The function must take a reference to $(D Node) to construct from.
|
* The function must take a reference to $(D Node) to construct from.
|
||||||
|
|
|
@ -29,7 +29,6 @@ import dyaml.encoding;
|
||||||
import dyaml.escapes;
|
import dyaml.escapes;
|
||||||
import dyaml.event;
|
import dyaml.event;
|
||||||
import dyaml.exception;
|
import dyaml.exception;
|
||||||
import dyaml.fastcharsearch;
|
|
||||||
import dyaml.flags;
|
import dyaml.flags;
|
||||||
import dyaml.linebreak;
|
import dyaml.linebreak;
|
||||||
import dyaml.queue;
|
import dyaml.queue;
|
||||||
|
@ -61,11 +60,13 @@ struct ScalarAnalysis
|
||||||
"allowSingleQuoted", "allowDoubleQuoted", "allowBlock", "isNull") flags;
|
"allowSingleQuoted", "allowDoubleQuoted", "allowBlock", "isNull") flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
///Quickly determines if a character is a newline.
|
private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029');
|
||||||
private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_;
|
|
||||||
|
|
||||||
// override the canFind added by the FastCharSearch mixins
|
private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`');
|
||||||
private alias canFind = std.algorithm.canFind;
|
|
||||||
|
private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}');
|
||||||
|
|
||||||
|
private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t');
|
||||||
|
|
||||||
//Emits YAML events into a file/stream.
|
//Emits YAML events into a file/stream.
|
||||||
struct Emitter
|
struct Emitter
|
||||||
|
@ -1036,14 +1037,11 @@ struct Emitter
|
||||||
|
|
||||||
foreach(const size_t index, const dchar c; scalar)
|
foreach(const size_t index, const dchar c; scalar)
|
||||||
{
|
{
|
||||||
mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
|
|
||||||
mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
|
|
||||||
|
|
||||||
//Check for indicators.
|
//Check for indicators.
|
||||||
if(index == 0)
|
if(index == 0)
|
||||||
{
|
{
|
||||||
//Leading indicators are special characters.
|
//Leading indicators are special characters.
|
||||||
if(specialCharSearch.canFind(c))
|
if(c.isSpecialChar)
|
||||||
{
|
{
|
||||||
flowIndicators = blockIndicators = true;
|
flowIndicators = blockIndicators = true;
|
||||||
}
|
}
|
||||||
|
@ -1060,7 +1058,7 @@ struct Emitter
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//Some indicators cannot appear within a scalar as well.
|
//Some indicators cannot appear within a scalar as well.
|
||||||
if(flowIndicatorSearch.canFind(c)){flowIndicators = true;}
|
if(c.isFlowIndicator){flowIndicators = true;}
|
||||||
if(c == ':')
|
if(c == ':')
|
||||||
{
|
{
|
||||||
flowIndicators = true;
|
flowIndicators = true;
|
||||||
|
@ -1073,7 +1071,7 @@ struct Emitter
|
||||||
}
|
}
|
||||||
|
|
||||||
//Check for line breaks, special, and unicode characters.
|
//Check for line breaks, special, and unicode characters.
|
||||||
if(newlineSearch_.canFind(c)){lineBreaks = true;}
|
if(c.isNewLine){lineBreaks = true;}
|
||||||
if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) &&
|
if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) &&
|
||||||
!((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') ||
|
!((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') ||
|
||||||
(c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF'))
|
(c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF'))
|
||||||
|
@ -1090,7 +1088,7 @@ struct Emitter
|
||||||
previousSpace = true;
|
previousSpace = true;
|
||||||
previousBreak = false;
|
previousBreak = false;
|
||||||
}
|
}
|
||||||
else if(newlineSearch_.canFind(c))
|
else if(c.isNewLine)
|
||||||
{
|
{
|
||||||
if(index == 0){leadingBreak = true;}
|
if(index == 0){leadingBreak = true;}
|
||||||
if(index == scalar.length - 1){trailingBreak = true;}
|
if(index == scalar.length - 1){trailingBreak = true;}
|
||||||
|
@ -1103,11 +1101,10 @@ struct Emitter
|
||||||
previousSpace = previousBreak = false;
|
previousSpace = previousBreak = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
|
|
||||||
//Prepare for the next character.
|
//Prepare for the next character.
|
||||||
preceededByWhitespace = spaceSearch.canFind(c);
|
preceededByWhitespace = c.isSpace != 0;
|
||||||
followedByWhitespace = index + 2 >= scalar.length ||
|
followedByWhitespace = index + 2 >= scalar.length ||
|
||||||
spaceSearch.canFind(scalar[index + 2]);
|
scalar[index + 2].isSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
with(analysis.flags)
|
with(analysis.flags)
|
||||||
|
@ -1347,14 +1344,14 @@ struct ScalarWriter
|
||||||
}
|
}
|
||||||
else if(breaks_)
|
else if(breaks_)
|
||||||
{
|
{
|
||||||
if(!newlineSearch_.canFind(c))
|
if(!c.isNewLine)
|
||||||
{
|
{
|
||||||
writeStartLineBreak();
|
writeStartLineBreak();
|
||||||
writeLineBreaks();
|
writeLineBreaks();
|
||||||
emitter_.writeIndent();
|
emitter_.writeIndent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if((c == dcharNone || c == '\'' || c == ' ' || newlineSearch_.canFind(c))
|
else if((c == dcharNone || c == '\'' || c == ' ' || c.isNewLine)
|
||||||
&& startChar_ < endChar_)
|
&& startChar_ < endChar_)
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
|
@ -1448,7 +1445,7 @@ struct ScalarWriter
|
||||||
const dchar c = nextChar();
|
const dchar c = nextChar();
|
||||||
if(breaks_)
|
if(breaks_)
|
||||||
{
|
{
|
||||||
if(!newlineSearch_.canFind(c))
|
if(!c.isNewLine)
|
||||||
{
|
{
|
||||||
if(!leadingSpace && c != dcharNone && c != ' ')
|
if(!leadingSpace && c != dcharNone && c != ' ')
|
||||||
{
|
{
|
||||||
|
@ -1471,7 +1468,7 @@ struct ScalarWriter
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ')
|
else if(c == dcharNone || c.isNewLine || c == ' ')
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
if(c == dcharNone){emitter_.writeLineBreak();}
|
if(c == dcharNone){emitter_.writeLineBreak();}
|
||||||
|
@ -1492,13 +1489,13 @@ struct ScalarWriter
|
||||||
const dchar c = nextChar();
|
const dchar c = nextChar();
|
||||||
if(breaks_)
|
if(breaks_)
|
||||||
{
|
{
|
||||||
if(!newlineSearch_.canFind(c))
|
if(!c.isNewLine)
|
||||||
{
|
{
|
||||||
writeLineBreaks();
|
writeLineBreaks();
|
||||||
if(c != dcharNone){emitter_.writeIndent();}
|
if(c != dcharNone){emitter_.writeIndent();}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(c == dcharNone || newlineSearch_.canFind(c))
|
else if(c == dcharNone || c.isNewLine)
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".no);
|
writeCurrentRange(Flag!"UpdateColumn".no);
|
||||||
if(c == dcharNone){emitter_.writeLineBreak();}
|
if(c == dcharNone){emitter_.writeLineBreak();}
|
||||||
|
@ -1538,14 +1535,14 @@ struct ScalarWriter
|
||||||
}
|
}
|
||||||
else if(breaks_)
|
else if(breaks_)
|
||||||
{
|
{
|
||||||
if(!newlineSearch_.canFind(c))
|
if(!c.isNewLine)
|
||||||
{
|
{
|
||||||
writeStartLineBreak();
|
writeStartLineBreak();
|
||||||
writeLineBreaks();
|
writeLineBreaks();
|
||||||
writeIndent(Flag!"ResetSpace".yes);
|
writeIndent(Flag!"ResetSpace".yes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ')
|
else if(c == dcharNone || c.isNewLine || c == ' ')
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
}
|
}
|
||||||
|
@ -1600,15 +1597,15 @@ struct ScalarWriter
|
||||||
const last = lastChar(text_, end);
|
const last = lastChar(text_, end);
|
||||||
const secondLast = end > 0 ? lastChar(text_, end) : 0;
|
const secondLast = end > 0 ? lastChar(text_, end) : 0;
|
||||||
|
|
||||||
if(newlineSearch_.canFind(text_[0]) || text_[0] == ' ')
|
if(text_[0].isNewLine || text_[0] == ' ')
|
||||||
{
|
{
|
||||||
hints[hintsIdx++] = cast(char)('0' + bestIndent);
|
hints[hintsIdx++] = cast(char)('0' + bestIndent);
|
||||||
}
|
}
|
||||||
if(!newlineSearch_.canFind(last))
|
if(!last.isNewLine)
|
||||||
{
|
{
|
||||||
hints[hintsIdx++] = '-';
|
hints[hintsIdx++] = '-';
|
||||||
}
|
}
|
||||||
else if(std.utf.count(text_) == 1 || newlineSearch_.canFind(secondLast))
|
else if(std.utf.count(text_) == 1 || secondLast.isNewLine)
|
||||||
{
|
{
|
||||||
hints[hintsIdx++] = '+';
|
hints[hintsIdx++] = '+';
|
||||||
}
|
}
|
||||||
|
@ -1680,7 +1677,7 @@ struct ScalarWriter
|
||||||
void updateBreaks(in dchar c, const Flag!"UpdateSpaces" updateSpaces) pure @safe
|
void updateBreaks(in dchar c, const Flag!"UpdateSpaces" updateSpaces) pure @safe
|
||||||
{
|
{
|
||||||
if(c == dcharNone){return;}
|
if(c == dcharNone){return;}
|
||||||
breaks_ = newlineSearch_.canFind(c);
|
breaks_ = (c.isNewLine != 0);
|
||||||
if(updateSpaces){spaces_ = c == ' ';}
|
if(updateSpaces){spaces_ = c == ' ';}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
|
|
||||||
// Copyright Ferdinand Majerech 2011.
|
|
||||||
// Distributed under the Boost Software License, Version 1.0.
|
|
||||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
|
||||||
// http://www.boost.org/LICENSE_1_0.txt)
|
|
||||||
|
|
||||||
module dyaml.fastcharsearch;
|
|
||||||
|
|
||||||
|
|
||||||
import std.algorithm;
|
|
||||||
import std.conv;
|
|
||||||
|
|
||||||
|
|
||||||
package:
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mixin used for fast searching for a character in string.
|
|
||||||
*
|
|
||||||
* Creates a lookup table to quickly determine if a character
|
|
||||||
* is present in the string. Size of the lookup table is limited;
|
|
||||||
* any characters not represented in the table will be checked
|
|
||||||
* by ordinary equality comparison.
|
|
||||||
*
|
|
||||||
* Params: chars = String to search in.
|
|
||||||
* tableSize = Maximum number of bytes used by the table.
|
|
||||||
*
|
|
||||||
* Generated method:
|
|
||||||
* bool canFind(dchar c)
|
|
||||||
*
|
|
||||||
* Determines if a character is in the string.
|
|
||||||
*/
|
|
||||||
template FastCharSearch(dstring chars, uint tableSize = 256)
|
|
||||||
{
|
|
||||||
private mixin(searchCode!(chars, tableSize)());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate the search table and the canFind method.
|
|
||||||
string searchCode(dstring chars, uint tableSize)()
|
|
||||||
{
|
|
||||||
import std.string;
|
|
||||||
|
|
||||||
const tableSizeStr = tableSize.to!string;
|
|
||||||
ubyte[tableSize] table;
|
|
||||||
table[] = 0;
|
|
||||||
|
|
||||||
//Characters that don't fit in the table.
|
|
||||||
dchar[] specialChars;
|
|
||||||
|
|
||||||
foreach(c; chars)
|
|
||||||
{
|
|
||||||
if(c < tableSize) { table[c] = 1; }
|
|
||||||
else { specialChars ~= c; }
|
|
||||||
}
|
|
||||||
|
|
||||||
string specialCharsCode()
|
|
||||||
{
|
|
||||||
return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || });
|
|
||||||
}
|
|
||||||
|
|
||||||
const caseInTable =
|
|
||||||
q{
|
|
||||||
if(c < %s)
|
|
||||||
{
|
|
||||||
return cast(immutable(bool))table_[c];
|
|
||||||
}
|
|
||||||
}.format(tableSize);
|
|
||||||
|
|
||||||
string code;
|
|
||||||
if(tableSize)
|
|
||||||
{
|
|
||||||
code ~=
|
|
||||||
q{
|
|
||||||
static immutable ubyte[%s] table_ = [
|
|
||||||
%s];
|
|
||||||
}.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, }));
|
|
||||||
}
|
|
||||||
code ~=
|
|
||||||
q{
|
|
||||||
bool canFind(const dchar c) @safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
%s
|
|
||||||
|
|
||||||
return %s;
|
|
||||||
}
|
|
||||||
}.format(tableSize ? caseInTable : "",
|
|
||||||
specialChars.length ? specialCharsCode() : q{false});
|
|
||||||
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
|
|
||||||
@safe unittest
|
|
||||||
{
|
|
||||||
mixin FastCharSearch!("+", 128) search;
|
|
||||||
assert(search.canFind('+'));
|
|
||||||
}
|
|
|
@ -126,13 +126,6 @@ struct Loader
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Destroy the Loader.
|
|
||||||
@trusted ~this()
|
|
||||||
{
|
|
||||||
reader_.destroy();
|
|
||||||
scanner_.destroy();
|
|
||||||
parser_.destroy();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set stream _name. Used in debugging messages.
|
/// Set stream _name. Used in debugging messages.
|
||||||
void name(string name) pure @safe nothrow @nogc
|
void name(string name) pure @safe nothrow @nogc
|
||||||
|
|
|
@ -1,451 +0,0 @@
|
||||||
// Copyright Ferdinand Majerech 2014, Digital Mars 2000-2012, Andrei Alexandrescu 2008- and Jonathan M Davis 2011-.
|
|
||||||
// Distributed under the Boost Software License, Version 1.0.
|
|
||||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
|
||||||
// http://www.boost.org/LICENSE_1_0.txt)
|
|
||||||
|
|
||||||
|
|
||||||
/// @nogc versions of or alternatives to Phobos functions that are not yet @nogc and
|
|
||||||
/// wrappers to simplify their use.
|
|
||||||
module dyaml.nogcutil;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import std.traits;
|
|
||||||
import std.typecons;
|
|
||||||
import std.typetuple;
|
|
||||||
import std.range;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// A NoGC version of std.conv.parse for integer types.
|
|
||||||
///
|
|
||||||
/// Differences:
|
|
||||||
/// overflow parameter - bool set to true if there was integer overflow.
|
|
||||||
/// Asserts that at least one character was parsed instead of throwing an exception.
|
|
||||||
/// The caller must validate the inputs before calling parseNoGC.
|
|
||||||
Target parseNoGC(Target, Source)(ref Source s, uint radix, out bool overflow)
|
|
||||||
@safe pure nothrow @nogc
|
|
||||||
if (isSomeChar!(ElementType!Source) &&
|
|
||||||
isIntegral!Target && !is(Target == enum))
|
|
||||||
in { assert(radix >= 2 && radix <= 36); }
|
|
||||||
body
|
|
||||||
{
|
|
||||||
immutable uint beyond = (radix < 10 ? '0' : 'a'-10) + radix;
|
|
||||||
|
|
||||||
Target v = 0;
|
|
||||||
size_t atStart = true;
|
|
||||||
|
|
||||||
// We can safely foreach over individual code points.
|
|
||||||
// Even with UTF-8 any digit is ASCII and anything not ASCII (such as the start of
|
|
||||||
// a UTF-8 sequence) is not a digit.
|
|
||||||
foreach(i; 0 .. s.length)
|
|
||||||
{
|
|
||||||
dchar c = s[i];
|
|
||||||
// We can just take a char instead of decoding because anything non-ASCII is not
|
|
||||||
// going to be a decodable digit, i.e. we will end at such a byte.
|
|
||||||
if (c < '0' || c >= 0x80)
|
|
||||||
break;
|
|
||||||
if (radix < 10)
|
|
||||||
{
|
|
||||||
if (c >= beyond)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (c > '9')
|
|
||||||
{
|
|
||||||
c |= 0x20;//poorman's tolower
|
|
||||||
if (c < 'a' || c >= beyond) { break; }
|
|
||||||
c -= 'a'-10-'0';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
auto blah = cast(Target) (v * radix + c - '0');
|
|
||||||
if (blah < v)
|
|
||||||
{
|
|
||||||
overflow = true;
|
|
||||||
return Target.max;
|
|
||||||
}
|
|
||||||
v = blah;
|
|
||||||
atStart = false;
|
|
||||||
}
|
|
||||||
assert(!atStart, "Nothing to parse in parse()");
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Buils a message to a buffer similarly to writef/writefln, but without
|
|
||||||
/// using GC.
|
|
||||||
///
|
|
||||||
/// C snprintf would be better, but it isn't pure.
|
|
||||||
/// formattedWrite isn't completely @nogc yet (although it isn't GC-heavy).
|
|
||||||
///
|
|
||||||
/// The user has to ensure buffer is long enough - an assert checks that we don't run
|
|
||||||
/// out of space. Currently this can only write strings and dchars.
|
|
||||||
char[] printNoGC(S...)(char[] buffer, S args) @safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
auto appender = appenderNoGC(buffer);
|
|
||||||
|
|
||||||
foreach(arg; args)
|
|
||||||
{
|
|
||||||
alias A = typeof(arg);
|
|
||||||
static if(is(A == char[]) || is(A == string)) { appender.put(arg); }
|
|
||||||
else static if(is(Unqual!A == dchar)) { appender.putDChar(arg); }
|
|
||||||
else static assert(false, "printNoGC does not support " ~ A.stringof);
|
|
||||||
}
|
|
||||||
|
|
||||||
return appender.data;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// A UFCS utility function to write a dchar to an AppenderNoGCFixed using writeDCharTo.
|
|
||||||
///
|
|
||||||
/// The char $(B must) be a valid dchar.
|
|
||||||
void putDChar(ref AppenderNoGCFixed!(char[], char) appender, dchar c)
|
|
||||||
@safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
char[4] dcharBuf;
|
|
||||||
if(c < 0x80)
|
|
||||||
{
|
|
||||||
dcharBuf[0] = cast(char)c;
|
|
||||||
appender.put(dcharBuf[0 .. 1]);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Should be safe to use as the first thing Reader does is validate everything.
|
|
||||||
const bytes = encodeValidCharNoGC(dcharBuf, c);
|
|
||||||
appender.put(dcharBuf[0 .. bytes]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convenience function that returns an $(D AppenderNoGCFixed!A) using with $(D array)
|
|
||||||
/// for storage.
|
|
||||||
AppenderNoGCFixed!(E[]) appenderNoGC(A : E[], E)(A array)
|
|
||||||
{
|
|
||||||
return AppenderNoGCFixed!(E[])(array);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A gutted, NoGC version of std.array.appender.
|
|
||||||
///
|
|
||||||
/// Works on a fixed-size buffer.
|
|
||||||
struct AppenderNoGCFixed(A : T[], T)
|
|
||||||
{
|
|
||||||
import std.array;
|
|
||||||
|
|
||||||
private struct Data
|
|
||||||
{
|
|
||||||
size_t capacity;
|
|
||||||
Unqual!T[] arr;
|
|
||||||
bool canExtend = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Data _data;
|
|
||||||
|
|
||||||
@nogc:
|
|
||||||
|
|
||||||
/// Construct an appender that will work with given buffer.
|
|
||||||
///
|
|
||||||
/// Data written to the appender will overwrite the buffer from the start.
|
|
||||||
this(T[] arr) @safe pure nothrow
|
|
||||||
{
|
|
||||||
// initialize to a given array.
|
|
||||||
_data.arr = cast(Unqual!T[])arr[0 .. 0]; //trusted
|
|
||||||
_data.capacity = arr.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the capacity of the array (the maximum number of elements the
|
|
||||||
* managed array can accommodate before triggering a reallocation). If any
|
|
||||||
* appending will reallocate, $(D capacity) returns $(D 0).
|
|
||||||
*/
|
|
||||||
@property size_t capacity() const @safe pure nothrow
|
|
||||||
{
|
|
||||||
return _data.capacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the managed array.
|
|
||||||
*/
|
|
||||||
@property inout(T)[] data() inout @safe pure nothrow
|
|
||||||
{
|
|
||||||
return cast(typeof(return))(_data.arr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ensure we can add nelems elements, resizing as necessary
|
|
||||||
private void ensureAddable(size_t nelems) @safe pure nothrow
|
|
||||||
{
|
|
||||||
assert(_data.capacity >= _data.arr.length + nelems,
|
|
||||||
"AppenderFixed ran out of space");
|
|
||||||
}
|
|
||||||
|
|
||||||
void put(U)(U[] items) if (is(Unqual!U == T))
|
|
||||||
{
|
|
||||||
// make sure we have enough space, then add the items
|
|
||||||
ensureAddable(items.length);
|
|
||||||
immutable len = _data.arr.length;
|
|
||||||
immutable newlen = len + items.length;
|
|
||||||
|
|
||||||
auto bigDataFun() @trusted nothrow { return _data.arr.ptr[0 .. newlen];}
|
|
||||||
auto bigData = bigDataFun();
|
|
||||||
|
|
||||||
alias UT = Unqual!T;
|
|
||||||
|
|
||||||
bigData[len .. newlen] = items[];
|
|
||||||
|
|
||||||
//We do this at the end, in case of exceptions
|
|
||||||
_data.arr = bigData;
|
|
||||||
}
|
|
||||||
|
|
||||||
// only allow overwriting data on non-immutable and non-const data
|
|
||||||
static if (isMutable!T)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Clears the managed array. This allows the elements of the array to be reused
|
|
||||||
* for appending.
|
|
||||||
*
|
|
||||||
* Note that clear is disabled for immutable or const element types, due to the
|
|
||||||
* possibility that $(D AppenderNoGCFixed) might overwrite immutable data.
|
|
||||||
*/
|
|
||||||
void clear() @safe pure nothrow
|
|
||||||
{
|
|
||||||
_data.arr = ()@trusted{ return _data.arr.ptr[0 .. 0]; }();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// Clear is not available for const/immutable data.
|
|
||||||
@disable void clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@safe unittest
|
|
||||||
{
|
|
||||||
char[256] buffer;
|
|
||||||
auto appender = appenderNoGC(buffer[]);
|
|
||||||
appender.put("found unsupported escape character: ");
|
|
||||||
appender.putDChar('a');
|
|
||||||
appender.putDChar('á');
|
|
||||||
assert(appender.data == "found unsupported escape character: aá");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Result of a validateUTF8NoGC call.
|
|
||||||
struct ValidateResult
|
|
||||||
{
|
|
||||||
/// Is the validated string valid?
|
|
||||||
bool valid;
|
|
||||||
/// Number of characters in the string.
|
|
||||||
///
|
|
||||||
/// If the string is not valid, this is the number of valid characters before
|
|
||||||
/// hitting the first invalid sequence.
|
|
||||||
size_t characterCount;
|
|
||||||
/// If the string is not valid, error message with details is here.
|
|
||||||
string msg;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Validate a UTF-8 string, checking if it is well-formed Unicode.
|
|
||||||
///
|
|
||||||
/// See_Also: ValidateResult
|
|
||||||
ValidateResult validateUTF8NoGC(const(char[]) str) @safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
immutable len = str.length;
|
|
||||||
size_t characterCount;
|
|
||||||
outer: for (size_t index = 0; index < len; )
|
|
||||||
{
|
|
||||||
if(str[index] < 0x80)
|
|
||||||
{
|
|
||||||
++index;
|
|
||||||
++characterCount;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto decoded = decodeUTF8NoGC!(No.validated)(str, index);
|
|
||||||
if(decoded.errorMessage !is null)
|
|
||||||
{
|
|
||||||
return ValidateResult(false, characterCount, decoded.errorMessage);
|
|
||||||
}
|
|
||||||
++characterCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ValidateResult(true, characterCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// @nogc version of std.utf.decode() for char[].
|
|
||||||
///
|
|
||||||
/// The caller $(B must) handle ASCII (< 0x80) characters manually; this is asserted to
|
|
||||||
/// force code using this function to be efficient.
|
|
||||||
///
|
|
||||||
/// Params:
|
|
||||||
///
|
|
||||||
/// validated = If ture, assume str is a valid UTF-8 string and don't generate any
|
|
||||||
/// error-checking code. If validated is true, str $(B must) be a valid
|
|
||||||
/// character, otherwise undefined behavior will occur. Also affects the
|
|
||||||
/// return type.
|
|
||||||
/// str = Will decode the first code point from this string.
|
|
||||||
/// index = Index in str where the code point starts. Will be updated to point to
|
|
||||||
/// the next code point.
|
|
||||||
///
|
|
||||||
/// Returns: If validated is true, the decoded character.
|
|
||||||
/// Otherwise a struct with a 'decoded' member - the decoded character, and a
|
|
||||||
/// 'string errorMessage' member that is null on success and otherwise stores
|
|
||||||
/// the error message.
|
|
||||||
auto decodeUTF8NoGC(Flag!"validated" validated)(const(char[]) str, ref size_t index)
|
|
||||||
{
|
|
||||||
static if(!validated) struct Result
|
|
||||||
{
|
|
||||||
dchar decoded;
|
|
||||||
string errorMessage;
|
|
||||||
}
|
|
||||||
else alias Result = dchar;
|
|
||||||
|
|
||||||
/// Dchar bitmask for different numbers of UTF-8 code units.
|
|
||||||
enum bitMask = tuple((1 << 7) - 1, (1 << 11) - 1, (1 << 16) - 1, (1 << 21) - 1);
|
|
||||||
|
|
||||||
auto pstr = str[index..$];
|
|
||||||
|
|
||||||
immutable length = str.length - index;
|
|
||||||
ubyte fst = pstr[0];
|
|
||||||
|
|
||||||
assert(fst & 0x80);
|
|
||||||
enum invalidUTFMsg = "Invalid UTF-8 sequence";
|
|
||||||
static if(!validated) { enum invalidUTF = Result(cast(dchar)int.max, invalidUTFMsg); }
|
|
||||||
|
|
||||||
// starter must have at least 2 first bits set
|
|
||||||
static if(validated)
|
|
||||||
{
|
|
||||||
assert((fst & 0b1100_0000) == 0b1100_0000, invalidUTFMsg);
|
|
||||||
}
|
|
||||||
else if((fst & 0b1100_0000) != 0b1100_0000)
|
|
||||||
{
|
|
||||||
return invalidUTF;
|
|
||||||
}
|
|
||||||
|
|
||||||
ubyte tmp = void;
|
|
||||||
dchar d = fst; // upper control bits are masked out later
|
|
||||||
fst <<= 1;
|
|
||||||
|
|
||||||
|
|
||||||
foreach (i; TypeTuple!(1, 2, 3))
|
|
||||||
{
|
|
||||||
static if(validated) { assert(i != length, "Decoding out of bounds"); }
|
|
||||||
else if(i == length) { return Result(cast(dchar)int.max, "Decoding out of bounds"); }
|
|
||||||
|
|
||||||
tmp = pstr[i];
|
|
||||||
static if(validated) { assert((tmp & 0xC0) == 0x80, invalidUTFMsg); }
|
|
||||||
else if((tmp & 0xC0) != 0x80) { return invalidUTF; }
|
|
||||||
|
|
||||||
d = (d << 6) | (tmp & 0x3F);
|
|
||||||
fst <<= 1;
|
|
||||||
|
|
||||||
if (!(fst & 0x80)) // no more bytes
|
|
||||||
{
|
|
||||||
d &= bitMask[i]; // mask out control bits
|
|
||||||
|
|
||||||
// overlong, could have been encoded with i bytes
|
|
||||||
static if(validated) { assert((d & ~bitMask[i - 1]) != 0, invalidUTFMsg); }
|
|
||||||
else if((d & ~bitMask[i - 1]) == 0) { return invalidUTF; }
|
|
||||||
|
|
||||||
// check for surrogates only needed for 3 bytes
|
|
||||||
static if (i == 2)
|
|
||||||
{
|
|
||||||
static if(validated) { assert(isValidDchar(d), invalidUTFMsg); }
|
|
||||||
else if(!isValidDchar(d)) { return invalidUTF; }
|
|
||||||
}
|
|
||||||
|
|
||||||
index += i + 1;
|
|
||||||
static if (i == 3)
|
|
||||||
{
|
|
||||||
static if(validated) { assert(d <= dchar.max, invalidUTFMsg); }
|
|
||||||
else if(d > dchar.max) { return invalidUTF; }
|
|
||||||
}
|
|
||||||
|
|
||||||
return Result(d);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static if(validated) { assert(false, invalidUTFMsg); }
|
|
||||||
else { return invalidUTF; }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ditto
|
|
||||||
alias decodeValidUTF8NoGC = decodeUTF8NoGC!(Yes.validated);
|
|
||||||
|
|
||||||
/// @nogc version of std.utf.encode() for char[].
|
|
||||||
///
|
|
||||||
/// The caller $(B must) handle ASCII (< 0x80) characters manually; this is asserted to
|
|
||||||
/// force code using this function to be efficient.
|
|
||||||
///
|
|
||||||
/// Params:
|
|
||||||
/// validated = If true, asssume c is a valid, non-surrogate UTF-32 code point and don't
|
|
||||||
/// generate any error-checking code. If validated is true, c $(B must) be
|
|
||||||
/// a valid character, otherwise undefined behavior will occur. Also affects
|
|
||||||
/// the return type.
|
|
||||||
/// buf = Buffer to write the encoded result to.
|
|
||||||
/// c = Character to encode.
|
|
||||||
///
|
|
||||||
/// Returns: If validated is true, number of bytes the encoded character takes up in buf.
|
|
||||||
/// Otherwise a struct with a 'bytes' member specifying the number of bytes of
|
|
||||||
/// the endocded character, and a 'string errorMessage' member that is null
|
|
||||||
/// if there was no error and otherwise stores the error message.
|
|
||||||
auto encodeCharNoGC(Flag!"validated" validated)(ref char[4] buf, dchar c)
|
|
||||||
@safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
static if(!validated) struct Result
|
|
||||||
{
|
|
||||||
size_t bytes;
|
|
||||||
string errorMessage;
|
|
||||||
}
|
|
||||||
else alias Result = size_t;
|
|
||||||
|
|
||||||
// Force the caller to optimize ASCII (the 1-byte case)
|
|
||||||
assert(c >= 0x80, "Caller should explicitly handle ASCII chars");
|
|
||||||
if (c <= 0x7FF)
|
|
||||||
{
|
|
||||||
assert(isValidDchar(c));
|
|
||||||
buf[0] = cast(char)(0xC0 | (c >> 6));
|
|
||||||
buf[1] = cast(char)(0x80 | (c & 0x3F));
|
|
||||||
return Result(2);
|
|
||||||
}
|
|
||||||
if (c <= 0xFFFF)
|
|
||||||
{
|
|
||||||
static if(validated)
|
|
||||||
{
|
|
||||||
assert(0xD800 > c || c > 0xDFFF,
|
|
||||||
"Supposedly valid code point is a surrogate code point");
|
|
||||||
}
|
|
||||||
else if(0xD800 <= c && c <= 0xDFFF)
|
|
||||||
{
|
|
||||||
return Result(size_t.max, "Can't encode a surrogate code point in UTF-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(isValidDchar(c));
|
|
||||||
buf[0] = cast(char)(0xE0 | (c >> 12));
|
|
||||||
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
|
|
||||||
buf[2] = cast(char)(0x80 | (c & 0x3F));
|
|
||||||
return Result(3);
|
|
||||||
}
|
|
||||||
if (c <= 0x10FFFF)
|
|
||||||
{
|
|
||||||
assert(isValidDchar(c));
|
|
||||||
buf[0] = cast(char)(0xF0 | (c >> 18));
|
|
||||||
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
|
|
||||||
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
|
|
||||||
buf[3] = cast(char)(0x80 | (c & 0x3F));
|
|
||||||
return Result(4);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(!isValidDchar(c));
|
|
||||||
static if(!validated)
|
|
||||||
{
|
|
||||||
return Result(size_t.max, "Can't encode an invalid code point in UTF-8");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
assert(false, "Supposedly valid code point is invalid");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ditto
|
|
||||||
alias encodeValidCharNoGC = encodeCharNoGC!(Yes.validated);
|
|
||||||
|
|
||||||
/// @nogc version of std.utf.isValidDchar
|
|
||||||
bool isValidDchar(dchar c) @safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
return c < 0xD800 || (c > 0xDFFF && c <= 0x10FFFF);
|
|
||||||
}
|
|
|
@ -142,16 +142,6 @@ final class Parser
|
||||||
marks_.reserve(32);
|
marks_.reserve(32);
|
||||||
}
|
}
|
||||||
|
|
||||||
///Destroy the parser.
|
|
||||||
@trusted ~this()
|
|
||||||
{
|
|
||||||
currentEvent_.destroy();
|
|
||||||
tagDirectives_.destroy();
|
|
||||||
tagDirectives_ = null;
|
|
||||||
states_.destroy();
|
|
||||||
marks_.destroy();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the next event is one of specified types.
|
* Check if the next event is one of specified types.
|
||||||
*
|
*
|
||||||
|
@ -538,8 +528,7 @@ final class Parser
|
||||||
{
|
{
|
||||||
string notInPlace;
|
string notInPlace;
|
||||||
bool inEscape = false;
|
bool inEscape = false;
|
||||||
import dyaml.nogcutil;
|
auto appender = appender!(char[])();
|
||||||
auto appender = appenderNoGC(cast(char[])tokenValue);
|
|
||||||
for(char[] oldValue = tokenValue; !oldValue.empty();)
|
for(char[] oldValue = tokenValue; !oldValue.empty();)
|
||||||
{
|
{
|
||||||
const dchar c = oldValue.front();
|
const dchar c = oldValue.front();
|
||||||
|
@ -549,7 +538,7 @@ final class Parser
|
||||||
{
|
{
|
||||||
if(c != '\\')
|
if(c != '\\')
|
||||||
{
|
{
|
||||||
if(notInPlace is null) { appender.putDChar(c); }
|
if(notInPlace is null) { appender.put(c); }
|
||||||
else { notInPlace ~= c; }
|
else { notInPlace ~= c; }
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -570,7 +559,7 @@ final class Parser
|
||||||
// many-byte unicode chars
|
// many-byte unicode chars
|
||||||
if(c != 'L' && c != 'P')
|
if(c != 'L' && c != 'P')
|
||||||
{
|
{
|
||||||
appender.putDChar(dyaml.escapes.fromEscape(c));
|
appender.put(dyaml.escapes.fromEscape(c));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Need to duplicate as we won't fit into
|
// Need to duplicate as we won't fit into
|
||||||
|
@ -596,10 +585,8 @@ final class Parser
|
||||||
assert(!hex.canFind!(d => !d.isHexDigit),
|
assert(!hex.canFind!(d => !d.isHexDigit),
|
||||||
"Scanner must ensure the hex string is valid");
|
"Scanner must ensure the hex string is valid");
|
||||||
|
|
||||||
bool overflow;
|
const decoded = cast(dchar)parse!int(hex, 16u);
|
||||||
const decoded = cast(dchar)parseNoGC!int(hex, 16u, overflow);
|
if(notInPlace is null) { appender.put(decoded); }
|
||||||
assert(!overflow, "Scanner must ensure there's no overflow");
|
|
||||||
if(notInPlace is null) { appender.putDChar(decoded); }
|
|
||||||
else { notInPlace ~= decoded; }
|
else { notInPlace ~= decoded; }
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,9 +47,6 @@ struct Queue(T)
|
||||||
/// Cursor pointing to the current node in iteration.
|
/// Cursor pointing to the current node in iteration.
|
||||||
Node* cursor_ = null;
|
Node* cursor_ = null;
|
||||||
|
|
||||||
/// The first element of a linked list of freed Nodes available for recycling.
|
|
||||||
Node* freeList_ = null;
|
|
||||||
|
|
||||||
/// Length of the queue.
|
/// Length of the queue.
|
||||||
size_t length_ = 0;
|
size_t length_ = 0;
|
||||||
|
|
||||||
|
@ -58,20 +55,6 @@ struct Queue(T)
|
||||||
@disable bool opEquals(ref Queue);
|
@disable bool opEquals(ref Queue);
|
||||||
@disable int opCmp(ref Queue);
|
@disable int opCmp(ref Queue);
|
||||||
|
|
||||||
/// Destroy the queue, deallocating all its elements.
|
|
||||||
@trusted nothrow ~this()
|
|
||||||
{
|
|
||||||
while(!empty) { pop(); }
|
|
||||||
while(freeList_ !is null)
|
|
||||||
{
|
|
||||||
auto toFree = freeList_;
|
|
||||||
freeList_ = toFree.next_;
|
|
||||||
free(toFree);
|
|
||||||
}
|
|
||||||
cursor_ = last_ = first_ = null;
|
|
||||||
length_ = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Start iterating over the queue.
|
/// Start iterating over the queue.
|
||||||
void startIteration() @safe pure nothrow @nogc
|
void startIteration() @safe pure nothrow @nogc
|
||||||
{
|
{
|
||||||
|
@ -101,7 +84,7 @@ struct Queue(T)
|
||||||
/// Push new item to the queue.
|
/// Push new item to the queue.
|
||||||
void push(T item) @safe nothrow
|
void push(T item) @safe nothrow
|
||||||
{
|
{
|
||||||
Node* newLast = newNode(item, null);
|
Node* newLast = new Node(item, null);
|
||||||
if(last_ !is null) { last_.next_ = newLast; }
|
if(last_ !is null) { last_.next_ = newLast; }
|
||||||
if(first_ is null) { first_ = newLast; }
|
if(first_ is null) { first_ = newLast; }
|
||||||
last_ = newLast;
|
last_ = newLast;
|
||||||
|
@ -118,7 +101,7 @@ struct Queue(T)
|
||||||
{
|
{
|
||||||
if(idx == 0)
|
if(idx == 0)
|
||||||
{
|
{
|
||||||
first_ = newNode(item, first_);
|
first_ = new Node(item, first_);
|
||||||
++length_;
|
++length_;
|
||||||
}
|
}
|
||||||
// Adding before last added element, so we can just push.
|
// Adding before last added element, so we can just push.
|
||||||
|
@ -130,7 +113,7 @@ struct Queue(T)
|
||||||
foreach(i; 1 .. idx) { current = current.next_; }
|
foreach(i; 1 .. idx) { current = current.next_; }
|
||||||
|
|
||||||
// Insert a new node after current, and put current.next_ behind it.
|
// Insert a new node after current, and put current.next_ behind it.
|
||||||
current.next_ = newNode(item, current.next_);
|
current.next_ = new Node(item, current.next_);
|
||||||
++length_;
|
++length_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -147,9 +130,6 @@ struct Queue(T)
|
||||||
Node* popped = first_;
|
Node* popped = first_;
|
||||||
first_ = first_.next_;
|
first_ = first_.next_;
|
||||||
|
|
||||||
Node* oldFree = freeList_;
|
|
||||||
freeList_ = popped;
|
|
||||||
freeList_.next_ = oldFree;
|
|
||||||
if(--length_ == 0)
|
if(--length_ == 0)
|
||||||
{
|
{
|
||||||
assert(first_ is null);
|
assert(first_ is null);
|
||||||
|
@ -181,43 +161,6 @@ struct Queue(T)
|
||||||
{
|
{
|
||||||
return length_;
|
return length_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
/// Get a new (or recycled) node with specified item and next node pointer.
|
|
||||||
///
|
|
||||||
/// Tries to reuse a node from freeList_, allocates a new node if not possible.
|
|
||||||
Node* newNode(ref T item, Node* next) @trusted nothrow
|
|
||||||
{
|
|
||||||
if(freeList_ !is null)
|
|
||||||
{
|
|
||||||
auto node = freeList_;
|
|
||||||
freeList_ = freeList_.next_;
|
|
||||||
*node = Node(item, next);
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
return allocate!Node(item, next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
/// Allocate a struct, passing arguments to its constructor or default initializer.
|
|
||||||
T* allocate(T, Args...)(Args args) @system nothrow
|
|
||||||
{
|
|
||||||
T* ptr = cast(T*)malloc(T.sizeof);
|
|
||||||
*ptr = T(args);
|
|
||||||
// The struct might contain references to GC-allocated memory, so tell the GC about it.
|
|
||||||
static if(hasIndirections!T) { GC.addRange(cast(void*)ptr, T.sizeof); }
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Deallocate struct pointed at by specified pointer.
|
|
||||||
void free(T)(T* ptr) @system nothrow
|
|
||||||
{
|
|
||||||
// GC doesn't need to care about any references in this struct anymore.
|
|
||||||
static if(hasIndirections!T) { GC.removeRange(cast(void*)ptr); }
|
|
||||||
core.stdc.stdlib.free(ptr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@safe unittest
|
@safe unittest
|
||||||
|
|
|
@ -22,12 +22,10 @@ import std.utf;
|
||||||
|
|
||||||
import tinyendian;
|
import tinyendian;
|
||||||
|
|
||||||
import dyaml.fastcharsearch;
|
|
||||||
import dyaml.encoding;
|
import dyaml.encoding;
|
||||||
import dyaml.exception;
|
import dyaml.exception;
|
||||||
import dyaml.nogcutil;
|
|
||||||
|
|
||||||
|
|
||||||
|
alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
package:
|
package:
|
||||||
|
|
||||||
|
@ -133,7 +131,7 @@ final class Reader
|
||||||
///
|
///
|
||||||
// XXX removed; search for 'risky' to find why.
|
// XXX removed; search for 'risky' to find why.
|
||||||
// Throws: ReaderException if trying to read past the end of the buffer.
|
// Throws: ReaderException if trying to read past the end of the buffer.
|
||||||
dchar peek(const size_t index) @safe pure nothrow @nogc
|
dchar peek(const size_t index) @safe pure
|
||||||
{
|
{
|
||||||
if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; }
|
if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; }
|
||||||
if(characterCount_ <= charIndex_ + index)
|
if(characterCount_ <= charIndex_ + index)
|
||||||
|
@ -159,7 +157,7 @@ final class Reader
|
||||||
++lastDecodedBufferOffset_;
|
++lastDecodedBufferOffset_;
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
return decodeValidUTF8NoGC(buffer_, lastDecodedBufferOffset_);
|
return decode(buffer_, lastDecodedBufferOffset_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 'Slow' path where we decode everything up to the requested character.
|
// 'Slow' path where we decode everything up to the requested character.
|
||||||
|
@ -176,7 +174,7 @@ final class Reader
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Optimized version of peek() for the case where peek index is 0.
|
/// Optimized version of peek() for the case where peek index is 0.
|
||||||
dchar peek() @safe pure nothrow @nogc
|
dchar peek() @safe pure
|
||||||
{
|
{
|
||||||
if(upcomingASCII_ > 0) { return buffer_[bufferOffset_]; }
|
if(upcomingASCII_ > 0) { return buffer_[bufferOffset_]; }
|
||||||
if(characterCount_ <= charIndex_) { return '\0'; }
|
if(characterCount_ <= charIndex_) { return '\0'; }
|
||||||
|
@ -216,7 +214,7 @@ final class Reader
|
||||||
/// slice will be shorter.
|
/// slice will be shorter.
|
||||||
///
|
///
|
||||||
/// Returns: Characters starting at current position or an empty slice if out of bounds.
|
/// Returns: Characters starting at current position or an empty slice if out of bounds.
|
||||||
char[] prefix(const size_t length) @safe pure nothrow @nogc
|
char[] prefix(const size_t length) @safe pure
|
||||||
{
|
{
|
||||||
return slice(length);
|
return slice(length);
|
||||||
}
|
}
|
||||||
|
@ -249,7 +247,7 @@ final class Reader
|
||||||
/// be shorter.
|
/// be shorter.
|
||||||
///
|
///
|
||||||
/// Returns: Slice into the internal buffer or an empty slice if out of bounds.
|
/// Returns: Slice into the internal buffer or an empty slice if out of bounds.
|
||||||
char[] slice(const size_t end) @safe pure nothrow @nogc
|
char[] slice(const size_t end) @safe pure
|
||||||
{
|
{
|
||||||
// Fast path in case the caller has already peek()ed all the way to end.
|
// Fast path in case the caller has already peek()ed all the way to end.
|
||||||
if(end == lastDecodedCharOffset_)
|
if(end == lastDecodedCharOffset_)
|
||||||
|
@ -277,7 +275,7 @@ final class Reader
|
||||||
///
|
///
|
||||||
/// Throws: ReaderException if trying to read past the end of the buffer
|
/// Throws: ReaderException if trying to read past the end of the buffer
|
||||||
/// or if invalid data is read.
|
/// or if invalid data is read.
|
||||||
dchar get() @safe pure nothrow @nogc
|
dchar get() @safe pure
|
||||||
{
|
{
|
||||||
const result = peek();
|
const result = peek();
|
||||||
forward();
|
forward();
|
||||||
|
@ -289,7 +287,7 @@ final class Reader
|
||||||
/// Params: length = Number or characters (code points, not bytes) to get.
|
/// Params: length = Number or characters (code points, not bytes) to get.
|
||||||
///
|
///
|
||||||
/// Returns: Characters starting at current position.
|
/// Returns: Characters starting at current position.
|
||||||
char[] get(const size_t length) @safe pure nothrow @nogc
|
char[] get(const size_t length) @safe pure
|
||||||
{
|
{
|
||||||
auto result = slice(length);
|
auto result = slice(length);
|
||||||
forward(length);
|
forward(length);
|
||||||
|
@ -299,10 +297,8 @@ final class Reader
|
||||||
/// Move current position forward.
|
/// Move current position forward.
|
||||||
///
|
///
|
||||||
/// Params: length = Number of characters to move position forward.
|
/// Params: length = Number of characters to move position forward.
|
||||||
void forward(size_t length) @safe pure nothrow @nogc
|
void forward(size_t length) @safe pure
|
||||||
{
|
{
|
||||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
|
||||||
|
|
||||||
while(length > 0)
|
while(length > 0)
|
||||||
{
|
{
|
||||||
auto asciiToTake = min(upcomingASCII_, length);
|
auto asciiToTake = min(upcomingASCII_, length);
|
||||||
|
@ -336,10 +332,10 @@ final class Reader
|
||||||
"ASCII must be handled by preceding code");
|
"ASCII must be handled by preceding code");
|
||||||
|
|
||||||
++charIndex_;
|
++charIndex_;
|
||||||
const c = decodeValidUTF8NoGC(buffer_, bufferOffset_);
|
const c = decode(buffer_, bufferOffset_);
|
||||||
|
|
||||||
// New line. (can compare with '\n' without decoding since it's ASCII)
|
// New line. (can compare with '\n' without decoding since it's ASCII)
|
||||||
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||||
{
|
{
|
||||||
++line_;
|
++line_;
|
||||||
column_ = 0;
|
column_ = 0;
|
||||||
|
@ -354,7 +350,7 @@ final class Reader
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Move current position forward by one character.
|
/// Move current position forward by one character.
|
||||||
void forward() @safe pure nothrow @nogc
|
void forward() @safe pure
|
||||||
{
|
{
|
||||||
++charIndex_;
|
++charIndex_;
|
||||||
lastDecodedBufferOffset_ = bufferOffset_;
|
lastDecodedBufferOffset_ = bufferOffset_;
|
||||||
|
@ -377,16 +373,15 @@ final class Reader
|
||||||
}
|
}
|
||||||
|
|
||||||
// UTF-8
|
// UTF-8
|
||||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
|
||||||
assert(bufferOffset_ < buffer_.length,
|
assert(bufferOffset_ < buffer_.length,
|
||||||
"Attempted to decode past the end of YAML buffer");
|
"Attempted to decode past the end of YAML buffer");
|
||||||
assert(buffer_[bufferOffset_] >= 0x80,
|
assert(buffer_[bufferOffset_] >= 0x80,
|
||||||
"ASCII must be handled by preceding code");
|
"ASCII must be handled by preceding code");
|
||||||
|
|
||||||
const c = decodeValidUTF8NoGC(buffer_, bufferOffset_);
|
const c = decode(buffer_, bufferOffset_);
|
||||||
|
|
||||||
// New line. (can compare with '\n' without decoding since it's ASCII)
|
// New line. (can compare with '\n' without decoding since it's ASCII)
|
||||||
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||||
{
|
{
|
||||||
++line_;
|
++line_;
|
||||||
column_ = 0;
|
column_ = 0;
|
||||||
|
@ -425,7 +420,7 @@ private:
|
||||||
// lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them.
|
// lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them.
|
||||||
//
|
//
|
||||||
// Does not advance the buffer position. Used in peek() and slice().
|
// Does not advance the buffer position. Used in peek() and slice().
|
||||||
dchar decodeNext() @safe pure nothrow @nogc
|
dchar decodeNext() @safe pure
|
||||||
{
|
{
|
||||||
assert(lastDecodedBufferOffset_ < buffer_.length,
|
assert(lastDecodedBufferOffset_ < buffer_.length,
|
||||||
"Attempted to decode past the end of YAML buffer");
|
"Attempted to decode past the end of YAML buffer");
|
||||||
|
@ -438,7 +433,7 @@ private:
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
return decodeValidUTF8NoGC(buffer_, lastDecodedBufferOffset_);
|
return decode(buffer_, lastDecodedBufferOffset_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -558,7 +553,7 @@ public:
|
||||||
/// Data can only be written up to the current position in the Reader buffer.
|
/// Data can only be written up to the current position in the Reader buffer.
|
||||||
///
|
///
|
||||||
/// See_Also: begin
|
/// See_Also: begin
|
||||||
void write(dchar c) @safe pure nothrow @nogc
|
void write(dchar c) @safe pure
|
||||||
{
|
{
|
||||||
assert(inProgress, "write called without begin");
|
assert(inProgress, "write called without begin");
|
||||||
if(c < 0x80)
|
if(c < 0x80)
|
||||||
|
@ -569,7 +564,7 @@ public:
|
||||||
|
|
||||||
// We need to encode a non-ASCII dchar into UTF-8
|
// We need to encode a non-ASCII dchar into UTF-8
|
||||||
char[4] encodeBuf;
|
char[4] encodeBuf;
|
||||||
const bytes = encodeValidCharNoGC(encodeBuf, c);
|
const bytes = encode(encodeBuf, c);
|
||||||
reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes];
|
reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes];
|
||||||
end_ += bytes;
|
end_ += bytes;
|
||||||
}
|
}
|
||||||
|
@ -585,7 +580,7 @@ public:
|
||||||
/// position = Position to insert the character at in code units, not code points.
|
/// position = Position to insert the character at in code units, not code points.
|
||||||
/// Must be less than slice length(); a previously returned length()
|
/// Must be less than slice length(); a previously returned length()
|
||||||
/// can be used.
|
/// can be used.
|
||||||
void insert(const dchar c, const size_t position) @safe pure nothrow @nogc
|
void insert(const dchar c, const size_t position) @safe pure
|
||||||
{
|
{
|
||||||
assert(inProgress, "insert called without begin");
|
assert(inProgress, "insert called without begin");
|
||||||
assert(start_ + position <= end_, "Trying to insert after the end of the slice");
|
assert(start_ + position <= end_, "Trying to insert after the end of the slice");
|
||||||
|
@ -596,7 +591,7 @@ public:
|
||||||
// Encode c into UTF-8
|
// Encode c into UTF-8
|
||||||
char[4] encodeBuf;
|
char[4] encodeBuf;
|
||||||
if(c < 0x80) { encodeBuf[0] = cast(char)c; }
|
if(c < 0x80) { encodeBuf[0] = cast(char)c; }
|
||||||
const size_t bytes = c < 0x80 ? 1 : encodeValidCharNoGC(encodeBuf, c);
|
const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c);
|
||||||
|
|
||||||
if(movedLength > 0)
|
if(movedLength > 0)
|
||||||
{
|
{
|
||||||
|
@ -660,16 +655,14 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Destroy the transaction and revert it if it hasn't been committed yet.
|
/// Destroy the transaction and revert it if it hasn't been committed yet.
|
||||||
///
|
void end() @safe pure nothrow @nogc
|
||||||
/// Does nothing for a default-initialized transaction.
|
|
||||||
~this() @safe pure nothrow @nogc
|
|
||||||
{
|
{
|
||||||
if(builder_ is null || committed_) { return; }
|
assert(builder_ && builder_.endStackUsed_ == stackLevel_ + 1,
|
||||||
assert(builder_.endStackUsed_ == stackLevel_ + 1,
|
|
||||||
"Parent transactions don't fully contain child transactions");
|
"Parent transactions don't fully contain child transactions");
|
||||||
builder_.pop();
|
builder_.pop();
|
||||||
builder_ = null;
|
builder_ = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -765,13 +758,8 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const encodeResult = encodeCharNoGC!(No.validated)(encodeBuf, c);
|
std.utf.encode(encodeBuf, c);
|
||||||
if(encodeResult.errorMessage !is null)
|
const bytes = codeLength!char(c);
|
||||||
{
|
|
||||||
result.errorMessage = encodeResult.errorMessage;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const bytes = encodeResult.bytes;
|
|
||||||
utf8[length .. length + bytes] = encodeBuf[0 .. bytes];
|
utf8[length .. length + bytes] = encodeBuf[0 .. bytes];
|
||||||
length += bytes;
|
length += bytes;
|
||||||
}
|
}
|
||||||
|
@ -789,14 +777,8 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
{
|
{
|
||||||
case UTFEncoding.UTF_8:
|
case UTFEncoding.UTF_8:
|
||||||
result.utf8 = cast(char[])input;
|
result.utf8 = cast(char[])input;
|
||||||
const validateResult = result.utf8.validateUTF8NoGC();
|
result.utf8.validate();
|
||||||
if(!validateResult.valid)
|
result.characterCount = std.utf.count(result.utf8);
|
||||||
{
|
|
||||||
result.errorMessage = "UTF-8 validation error after character #" ~
|
|
||||||
validateResult.characterCount.to!string ~ ": " ~
|
|
||||||
validateResult.msg;
|
|
||||||
}
|
|
||||||
result.characterCount = validateResult.characterCount;
|
|
||||||
break;
|
break;
|
||||||
case UTFEncoding.UTF_16:
|
case UTFEncoding.UTF_16:
|
||||||
assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
|
assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
|
||||||
|
@ -818,7 +800,7 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine if all characters (code points, not bytes) in a string are printable.
|
/// Determine if all characters (code points, not bytes) in a string are printable.
|
||||||
bool isPrintableValidUTF8(const char[] chars) @safe pure nothrow @nogc
|
bool isPrintableValidUTF8(const char[] chars) @safe pure
|
||||||
{
|
{
|
||||||
// This is oversized (only 128 entries are necessary) simply because having 256
|
// This is oversized (only 128 entries are necessary) simply because having 256
|
||||||
// entries improves performance... for some reason (alignment?)
|
// entries improves performance... for some reason (alignment?)
|
||||||
|
@ -918,7 +900,7 @@ bool isPrintableValidUTF8(const char[] chars) @safe pure nothrow @nogc
|
||||||
if(index == chars.length) { break; }
|
if(index == chars.length) { break; }
|
||||||
|
|
||||||
// Not ASCII, need to decode.
|
// Not ASCII, need to decode.
|
||||||
const dchar c = decodeValidUTF8NoGC(chars, index);
|
const dchar c = decode(chars, index);
|
||||||
// We now c is not ASCII, so only check for printable non-ASCII chars.
|
// We now c is not ASCII, so only check for printable non-ASCII chars.
|
||||||
if(!(c == 0x85 || (c >= 0xA0 && c <= '\uD7FF') ||
|
if(!(c == 0x85 || (c >= 0xA0 && c <= '\uD7FF') ||
|
||||||
(c >= '\uE000' && c <= '\uFFFD') ||
|
(c >= '\uE000' && c <= '\uFFFD') ||
|
||||||
|
|
|
@ -81,13 +81,6 @@ final class Representer
|
||||||
addRepresenter!SysTime(&representSysTime);
|
addRepresenter!SysTime(&representSysTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
///Destroy the Representer.
|
|
||||||
~this() pure @safe nothrow
|
|
||||||
{
|
|
||||||
representers_.destroy();
|
|
||||||
representers_ = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
///Set default _style for scalars. If style is $(D ScalarStyle.Invalid), the _style is chosen automatically.
|
///Set default _style for scalars. If style is $(D ScalarStyle.Invalid), the _style is chosen automatically.
|
||||||
@property void defaultScalarStyle(ScalarStyle style) pure @safe nothrow
|
@property void defaultScalarStyle(ScalarStyle style) pure @safe nothrow
|
||||||
{
|
{
|
||||||
|
|
|
@ -68,13 +68,6 @@ final class Resolver
|
||||||
if(defaultImplicitResolvers){addImplicitResolvers();}
|
if(defaultImplicitResolvers){addImplicitResolvers();}
|
||||||
}
|
}
|
||||||
|
|
||||||
///Destroy the Resolver.
|
|
||||||
~this() pure @safe nothrow
|
|
||||||
{
|
|
||||||
yamlImplicitResolvers_.destroy();
|
|
||||||
yamlImplicitResolvers_ = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an implicit scalar resolver.
|
* Add an implicit scalar resolver.
|
||||||
*
|
*
|
||||||
|
|
|
@ -20,11 +20,10 @@ import std.exception;
|
||||||
import std.string;
|
import std.string;
|
||||||
import std.typecons;
|
import std.typecons;
|
||||||
import std.traits : Unqual;
|
import std.traits : Unqual;
|
||||||
|
import std.utf;
|
||||||
|
|
||||||
import dyaml.fastcharsearch;
|
|
||||||
import dyaml.escapes;
|
import dyaml.escapes;
|
||||||
import dyaml.exception;
|
import dyaml.exception;
|
||||||
import dyaml.nogcutil;
|
|
||||||
import dyaml.queue;
|
import dyaml.queue;
|
||||||
import dyaml.reader;
|
import dyaml.reader;
|
||||||
import dyaml.style;
|
import dyaml.style;
|
||||||
|
@ -53,6 +52,21 @@ package:
|
||||||
/// TAG(value)
|
/// TAG(value)
|
||||||
/// SCALAR(value, plain, style)
|
/// SCALAR(value, plain, style)
|
||||||
|
|
||||||
|
alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
|
alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
|
alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
|
alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
|
alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
|
||||||
|
|
||||||
|
alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
|
alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||||
|
|
||||||
|
alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
|
||||||
|
|
||||||
/// Marked exception thrown at scanner errors.
|
/// Marked exception thrown at scanner errors.
|
||||||
///
|
///
|
||||||
|
@ -153,14 +167,6 @@ final class Scanner
|
||||||
/// not.
|
/// not.
|
||||||
char[256] msgBuffer_;
|
char[256] msgBuffer_;
|
||||||
|
|
||||||
/// Used to detect if a character is any whitespace plus '\0'
|
|
||||||
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
|
|
||||||
/// Used to detect if a character is any line break plus '\0'
|
|
||||||
mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks;
|
|
||||||
|
|
||||||
/// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch.
|
|
||||||
alias canFind = std.algorithm.canFind;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Construct a Scanner using specified Reader.
|
/// Construct a Scanner using specified Reader.
|
||||||
this(Reader reader) @safe nothrow
|
this(Reader reader) @safe nothrow
|
||||||
|
@ -170,16 +176,6 @@ final class Scanner
|
||||||
fetchStreamStart();
|
fetchStreamStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Destroy the scanner.
|
|
||||||
~this() @trusted
|
|
||||||
{
|
|
||||||
tokens_.destroy();
|
|
||||||
indents_.destroy();
|
|
||||||
possibleSimpleKeys_.destroy();
|
|
||||||
possibleSimpleKeys_ = null;
|
|
||||||
reader_ = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if the next token is one of specified types.
|
/// Check if the next token is one of specified types.
|
||||||
///
|
///
|
||||||
/// If no types are specified, checks if any tokens are left.
|
/// If no types are specified, checks if any tokens are left.
|
||||||
|
@ -235,7 +231,13 @@ final class Scanner
|
||||||
/// Build an error message in msgBuffer_ and return it as a string.
|
/// Build an error message in msgBuffer_ and return it as a string.
|
||||||
string buildMsg(S ...)(S args) @trusted
|
string buildMsg(S ...)(S args) @trusted
|
||||||
{
|
{
|
||||||
return cast(string)msgBuffer_.printNoGC(args);
|
try {
|
||||||
|
return text(args);
|
||||||
|
}
|
||||||
|
catch (Exception)
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Most scanning error messages have the same format; so build them with this
|
/// Most scanning error messages have the same format; so build them with this
|
||||||
|
@ -739,7 +741,7 @@ final class Scanner
|
||||||
tokens_.push(plain);
|
tokens_.push(plain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pure nothrow @nogc:
|
pure:
|
||||||
|
|
||||||
///Check if the next token is DIRECTIVE: ^ '%' ...
|
///Check if the next token is DIRECTIVE: ^ '%' ...
|
||||||
bool checkDirective() @safe
|
bool checkDirective() @safe
|
||||||
|
@ -754,7 +756,7 @@ final class Scanner
|
||||||
return reader_.column == 0 &&
|
return reader_.column == 0 &&
|
||||||
reader_.peekByte() == '-' &&
|
reader_.peekByte() == '-' &&
|
||||||
reader_.prefix(3) == "---" &&
|
reader_.prefix(3) == "---" &&
|
||||||
searchAllWhitespace.canFind(reader_.peek(3));
|
reader_.peek(3).isWhiteSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
|
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
|
||||||
|
@ -764,13 +766,13 @@ final class Scanner
|
||||||
return reader_.column == 0 &&
|
return reader_.column == 0 &&
|
||||||
reader_.peekByte() == '.' &&
|
reader_.peekByte() == '.' &&
|
||||||
reader_.prefix(3) == "..." &&
|
reader_.prefix(3) == "..." &&
|
||||||
searchAllWhitespace.canFind(reader_.peek(3));
|
reader_.peek(3).isWhiteSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
|
/// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
|
||||||
bool checkBlockEntry() @safe
|
bool checkBlockEntry() @safe
|
||||||
{
|
{
|
||||||
return searchAllWhitespace.canFind(reader_.peek(1));
|
return !!reader_.peek(1).isWhiteSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the next token is KEY(flow context): '?'
|
/// Check if the next token is KEY(flow context): '?'
|
||||||
|
@ -778,7 +780,7 @@ final class Scanner
|
||||||
/// or KEY(block context): '?' (' '|'\n')
|
/// or KEY(block context): '?' (' '|'\n')
|
||||||
bool checkKey() @safe
|
bool checkKey() @safe
|
||||||
{
|
{
|
||||||
return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)));
|
return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the next token is VALUE(flow context): ':'
|
/// Check if the next token is VALUE(flow context): ':'
|
||||||
|
@ -786,7 +788,7 @@ final class Scanner
|
||||||
/// or VALUE(block context): ':' (' '|'\n')
|
/// or VALUE(block context): ':' (' '|'\n')
|
||||||
bool checkValue() @safe
|
bool checkValue() @safe
|
||||||
{
|
{
|
||||||
return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1));
|
return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the next token is a plain scalar.
|
/// Check if the next token is a plain scalar.
|
||||||
|
@ -806,13 +808,11 @@ final class Scanner
|
||||||
bool checkPlain() @safe
|
bool checkPlain() @safe
|
||||||
{
|
{
|
||||||
const c = reader_.peek();
|
const c = reader_.peek();
|
||||||
mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d
|
if(!c.isNonScalarStartCharacter)
|
||||||
searchPlainNotFirstChar;
|
|
||||||
if(!searchPlainNotFirstChar.canFind(c))
|
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return !searchAllWhitespace.canFind(reader_.peek(1)) &&
|
return !reader_.peek(1).isWhiteSpace &&
|
||||||
(c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
|
(c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -848,7 +848,7 @@ final class Scanner
|
||||||
/// Scan and throw away all characters until next line break.
|
/// Scan and throw away all characters until next line break.
|
||||||
void scanToNextBreak() @safe
|
void scanToNextBreak() @safe
|
||||||
{
|
{
|
||||||
while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); }
|
while(!reader_.peek().isBreak) { reader_.forward(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan all characters until next line break.
|
/// Scan all characters until next line break.
|
||||||
|
@ -858,7 +858,7 @@ final class Scanner
|
||||||
void scanToNextBreakToSlice() @safe
|
void scanToNextBreakToSlice() @safe
|
||||||
{
|
{
|
||||||
uint length = 0;
|
uint length = 0;
|
||||||
while(!searchAllBreaks.canFind(reader_.peek(length)))
|
while(!reader_.peek(length).isBreak)
|
||||||
{
|
{
|
||||||
++length;
|
++length;
|
||||||
}
|
}
|
||||||
|
@ -1072,7 +1072,7 @@ final class Scanner
|
||||||
{
|
{
|
||||||
findNextNonSpace();
|
findNextNonSpace();
|
||||||
if(reader_.peekByte() == '#') { scanToNextBreak(); }
|
if(reader_.peekByte() == '#') { scanToNextBreak(); }
|
||||||
if(searchAllBreaks.canFind(reader_.peek()))
|
if(reader_.peek().isBreak)
|
||||||
{
|
{
|
||||||
scanLineBreak();
|
scanLineBreak();
|
||||||
return;
|
return;
|
||||||
|
@ -1106,7 +1106,7 @@ final class Scanner
|
||||||
char[] value = reader_.sliceBuilder.finish();
|
char[] value = reader_.sliceBuilder.finish();
|
||||||
if(error_) { return Token.init; }
|
if(error_) { return Token.init; }
|
||||||
|
|
||||||
if(!searchAllWhitespace.canFind(reader_.peek()) &&
|
if(!reader_.peek().isWhiteSpace &&
|
||||||
!"?:,]}%@"d.canFind(reader_.peekByte()))
|
!"?:,]}%@"d.canFind(reader_.peekByte()))
|
||||||
{
|
{
|
||||||
enum anchorCtx = "While scanning an anchor";
|
enum anchorCtx = "While scanning an anchor";
|
||||||
|
@ -1141,7 +1141,6 @@ final class Scanner
|
||||||
// (slice) we will produce.
|
// (slice) we will produce.
|
||||||
uint handleEnd;
|
uint handleEnd;
|
||||||
|
|
||||||
mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
|
|
||||||
if(c == '<')
|
if(c == '<')
|
||||||
{
|
{
|
||||||
reader_.forward(2);
|
reader_.forward(2);
|
||||||
|
@ -1157,7 +1156,7 @@ final class Scanner
|
||||||
}
|
}
|
||||||
reader_.forward();
|
reader_.forward();
|
||||||
}
|
}
|
||||||
else if(searchAllWhitespace.canFind(c))
|
else if(c.isWhiteSpace)
|
||||||
{
|
{
|
||||||
reader_.forward();
|
reader_.forward();
|
||||||
handleEnd = 0;
|
handleEnd = 0;
|
||||||
|
@ -1168,7 +1167,7 @@ final class Scanner
|
||||||
uint length = 1;
|
uint length = 1;
|
||||||
bool useHandle = false;
|
bool useHandle = false;
|
||||||
|
|
||||||
while(!search.canFind(c))
|
while(!c.isBreakOrSpace)
|
||||||
{
|
{
|
||||||
if(c == '!')
|
if(c == '!')
|
||||||
{
|
{
|
||||||
|
@ -1196,7 +1195,7 @@ final class Scanner
|
||||||
if(error_) { return Token.init; }
|
if(error_) { return Token.init; }
|
||||||
}
|
}
|
||||||
|
|
||||||
if(search.canFind(reader_.peek()))
|
if(reader_.peek().isBreakOrSpace)
|
||||||
{
|
{
|
||||||
char[] slice = reader_.sliceBuilder.finish();
|
char[] slice = reader_.sliceBuilder.finish();
|
||||||
return tagToken(startMark, reader_.mark, slice, handleEnd);
|
return tagToken(startMark, reader_.mark, slice, handleEnd);
|
||||||
|
@ -1326,7 +1325,7 @@ final class Scanner
|
||||||
// (which are at the end of the scalar). Otherwise re remove them (end the
|
// (which are at the end of the scalar). Otherwise re remove them (end the
|
||||||
// transaction).
|
// transaction).
|
||||||
if(chomping == Chomping.Keep) { breaksTransaction.commit(); }
|
if(chomping == Chomping.Keep) { breaksTransaction.commit(); }
|
||||||
else { breaksTransaction.__dtor(); }
|
else { breaksTransaction.end(); }
|
||||||
if(chomping != Chomping.Strip && lineBreak != int.max)
|
if(chomping != Chomping.Strip && lineBreak != int.max)
|
||||||
{
|
{
|
||||||
// If chomping is Keep, we keep the line break but the first line break
|
// If chomping is Keep, we keep the line break but the first line break
|
||||||
|
@ -1435,7 +1434,7 @@ final class Scanner
|
||||||
findNextNonSpace();
|
findNextNonSpace();
|
||||||
if(reader_.peekByte()== '#') { scanToNextBreak(); }
|
if(reader_.peekByte()== '#') { scanToNextBreak(); }
|
||||||
|
|
||||||
if(searchAllBreaks.canFind(reader_.peek()))
|
if(reader_.peek().isBreak)
|
||||||
{
|
{
|
||||||
scanLineBreak();
|
scanLineBreak();
|
||||||
return;
|
return;
|
||||||
|
@ -1527,8 +1526,6 @@ final class Scanner
|
||||||
{
|
{
|
||||||
dchar c = reader_.peek();
|
dchar c = reader_.peek();
|
||||||
|
|
||||||
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
|
|
||||||
|
|
||||||
size_t numCodePoints = 0;
|
size_t numCodePoints = 0;
|
||||||
// This is an optimized way of writing:
|
// This is an optimized way of writing:
|
||||||
// while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
|
// while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
|
||||||
|
@ -1546,8 +1543,8 @@ final class Scanner
|
||||||
for(size_t i = oldSliceLength; i < slice.length;)
|
for(size_t i = oldSliceLength; i < slice.length;)
|
||||||
{
|
{
|
||||||
// slice is UTF-8 - need to decode
|
// slice is UTF-8 - need to decode
|
||||||
const ch = slice[i] < 0x80 ? slice[i++] : decodeValidUTF8NoGC(slice, i);
|
const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
|
||||||
if(search.canFind(ch)) { break outer; }
|
if(ch.isFlowScalarBreakSpace) { break outer; }
|
||||||
++numCodePoints;
|
++numCodePoints;
|
||||||
}
|
}
|
||||||
oldSliceLength = slice.length;
|
oldSliceLength = slice.length;
|
||||||
|
@ -1596,14 +1593,15 @@ final class Scanner
|
||||||
char[2] escapeStart = ['\\', cast(char) c];
|
char[2] escapeStart = ['\\', cast(char) c];
|
||||||
reader_.sliceBuilder.write(escapeStart);
|
reader_.sliceBuilder.write(escapeStart);
|
||||||
reader_.sliceBuilder.write(hex);
|
reader_.sliceBuilder.write(hex);
|
||||||
bool overflow;
|
|
||||||
// Note: This is just error checking; Parser does the actual
|
// Note: This is just error checking; Parser does the actual
|
||||||
// escaping (otherwise we could accidentally create an
|
// escaping (otherwise we could accidentally create an
|
||||||
// escape sequence here that wasn't in input, breaking the
|
// escape sequence here that wasn't in input, breaking the
|
||||||
// escaping code in parser, which is in parser because it
|
// escaping code in parser, which is in parser because it
|
||||||
// can't always be done in place)
|
// can't always be done in place)
|
||||||
parseNoGC!int(hex, 16u, overflow);
|
try {
|
||||||
if(overflow)
|
parse!int(hex, 16u);
|
||||||
|
}
|
||||||
|
catch (Exception)
|
||||||
{
|
{
|
||||||
error("While scanning a double quoted scalar", startMark,
|
error("While scanning a double quoted scalar", startMark,
|
||||||
"overflow when parsing an escape sequence of " ~
|
"overflow when parsing an escape sequence of " ~
|
||||||
|
@ -1689,7 +1687,7 @@ final class Scanner
|
||||||
// Instead of checking indentation, we check for document separators.
|
// Instead of checking indentation, we check for document separators.
|
||||||
const prefix = reader_.prefix(3);
|
const prefix = reader_.prefix(3);
|
||||||
if((prefix == "---" || prefix == "...") &&
|
if((prefix == "---" || prefix == "...") &&
|
||||||
searchAllWhitespace.canFind(reader_.peek(3)))
|
reader_.peek(3).isWhiteSpace)
|
||||||
{
|
{
|
||||||
error("While scanning a quoted scalar", startMark,
|
error("While scanning a quoted scalar", startMark,
|
||||||
"found unexpected document separator", reader_.mark);
|
"found unexpected document separator", reader_.mark);
|
||||||
|
@ -1741,8 +1739,8 @@ final class Scanner
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
const cNext = reader_.peek(length + 1);
|
const cNext = reader_.peek(length + 1);
|
||||||
if(searchAllWhitespace.canFind(c) ||
|
if(c.isWhiteSpace ||
|
||||||
(c == ':' && searchAllWhitespace.canFind(cNext)))
|
(c == ':' && cNext.isWhiteSpace))
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1755,7 +1753,7 @@ final class Scanner
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
c = reader_.peek(length);
|
c = reader_.peek(length);
|
||||||
if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c))
|
if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1765,7 +1763,7 @@ final class Scanner
|
||||||
|
|
||||||
// It's not clear what we should do with ':' in the flow context.
|
// It's not clear what we should do with ':' in the flow context.
|
||||||
if(flowLevel_ > 0 && c == ':' &&
|
if(flowLevel_ > 0 && c == ':' &&
|
||||||
!searchAllWhitespace.canFind(reader_.peek(length + 1)) &&
|
!reader_.peek(length + 1).isWhiteSpace &&
|
||||||
!",[]{}"d.canFind(reader_.peek(length + 1)))
|
!",[]{}"d.canFind(reader_.peek(length + 1)))
|
||||||
{
|
{
|
||||||
// This is an error; throw the slice away.
|
// This is an error; throw the slice away.
|
||||||
|
@ -1799,7 +1797,7 @@ final class Scanner
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spacesTransaction.__dtor();
|
spacesTransaction.end();
|
||||||
char[] slice = reader_.sliceBuilder.finish();
|
char[] slice = reader_.sliceBuilder.finish();
|
||||||
|
|
||||||
return scalarToken(startMark, endMark, slice, ScalarStyle.Plain);
|
return scalarToken(startMark, endMark, slice, ScalarStyle.Plain);
|
||||||
|
@ -1821,10 +1819,7 @@ final class Scanner
|
||||||
reader_.forward(length);
|
reader_.forward(length);
|
||||||
|
|
||||||
dchar c = reader_.peek();
|
dchar c = reader_.peek();
|
||||||
mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search;
|
if(!c.isNSChar)
|
||||||
// No newline after the spaces (if any)
|
|
||||||
// (Excluding ' ' so we can use the same FastCharSearch as below)
|
|
||||||
if(!search.canFind(c) && c != ' ')
|
|
||||||
{
|
{
|
||||||
// We have spaces, but no newline.
|
// We have spaces, but no newline.
|
||||||
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
|
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
|
||||||
|
@ -1835,7 +1830,7 @@ final class Scanner
|
||||||
const lineBreak = scanLineBreak();
|
const lineBreak = scanLineBreak();
|
||||||
allowSimpleKey_ = true;
|
allowSimpleKey_ = true;
|
||||||
|
|
||||||
static bool end(Reader reader_) @safe pure nothrow @nogc
|
static bool end(Reader reader_) @safe pure
|
||||||
{
|
{
|
||||||
const prefix = reader_.prefix(3);
|
const prefix = reader_.prefix(3);
|
||||||
return ("---" == prefix || "..." == prefix)
|
return ("---" == prefix || "..." == prefix)
|
||||||
|
@ -1849,7 +1844,7 @@ final class Scanner
|
||||||
alias Transaction = SliceBuilder.Transaction;
|
alias Transaction = SliceBuilder.Transaction;
|
||||||
auto transaction = Transaction(&reader_.sliceBuilder);
|
auto transaction = Transaction(&reader_.sliceBuilder);
|
||||||
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
|
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
|
||||||
while(search.canFind(reader_.peek()))
|
while(reader_.peek().isNSChar)
|
||||||
{
|
{
|
||||||
if(reader_.peekByte() == ' ') { reader_.forward(); }
|
if(reader_.peekByte() == ' ') { reader_.forward(); }
|
||||||
else
|
else
|
||||||
|
@ -1917,8 +1912,7 @@ final class Scanner
|
||||||
const startLen = reader_.sliceBuilder.length;
|
const startLen = reader_.sliceBuilder.length;
|
||||||
{
|
{
|
||||||
uint length = 0;
|
uint length = 0;
|
||||||
mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search;
|
while(c.isAlphaNum || c.isURIChar)
|
||||||
while(c.isAlphaNum || search.canFind(c))
|
|
||||||
{
|
{
|
||||||
if(c == '%')
|
if(c == '%')
|
||||||
{
|
{
|
||||||
|
@ -1978,9 +1972,7 @@ final class Scanner
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const decoded = decodeUTF8NoGC!(No.validated)(bytes[], nextChar);
|
c = decode(bytes[], nextChar);
|
||||||
if(decoded.errorMessage !is null) { return size_t.max; }
|
|
||||||
c = decoded.decoded;
|
|
||||||
}
|
}
|
||||||
reader_.sliceBuilder.write(c);
|
reader_.sliceBuilder.write(c);
|
||||||
if(bytes.length - nextChar > 0)
|
if(bytes.length - nextChar > 0)
|
||||||
|
|
|
@ -85,12 +85,6 @@ struct Serializer
|
||||||
~this() @safe
|
~this() @safe
|
||||||
{
|
{
|
||||||
emitter_.emit(streamEndEvent(Mark(), Mark()));
|
emitter_.emit(streamEndEvent(Mark(), Mark()));
|
||||||
YAMLVersion_.destroy();
|
|
||||||
YAMLVersion_ = null;
|
|
||||||
serializedNodes_.destroy();
|
|
||||||
serializedNodes_ = null;
|
|
||||||
anchors_.destroy();
|
|
||||||
anchors_ = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///Serialize a node, emitting it in the process.
|
///Serialize a node, emitting it in the process.
|
||||||
|
|
|
@ -1,161 +0,0 @@
|
||||||
// Copyright Ferdinand Majerech 2014.
|
|
||||||
// Distributed under the Boost Software License, Version 1.0.
|
|
||||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
|
||||||
// http://www.boost.org/LICENSE_1_0.txt)
|
|
||||||
|
|
||||||
|
|
||||||
// Code that is currently unused but may be useful for future D:YAML releases
|
|
||||||
module dyaml.unused;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import std.utf;
|
|
||||||
|
|
||||||
import tinyendian;
|
|
||||||
|
|
||||||
// Decode an UTF-8/16/32 buffer to UTF-32 (for UTF-32 this does nothing).
|
|
||||||
//
|
|
||||||
// Params:
|
|
||||||
//
|
|
||||||
// input = The UTF-8/16/32 buffer to decode.
|
|
||||||
// encoding = Encoding of input.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
//
|
|
||||||
// A struct with the following members:
|
|
||||||
//
|
|
||||||
// $(D string errorMessage) In case of a decoding error, the error message is stored
|
|
||||||
// here. If there was no error, errorMessage is NULL. Always
|
|
||||||
// check this first before using the other members.
|
|
||||||
// $(D dchar[] decoded) A GC-allocated buffer with decoded UTF-32 characters.
|
|
||||||
auto decodeUTF(ubyte[] input, UTFEncoding encoding) @safe pure nothrow
|
|
||||||
{
|
|
||||||
// Documented in function ddoc.
|
|
||||||
struct Result
|
|
||||||
{
|
|
||||||
string errorMessage;
|
|
||||||
dchar[] decoded;
|
|
||||||
}
|
|
||||||
|
|
||||||
Result result;
|
|
||||||
|
|
||||||
// Decode input_ if it's encoded as UTF-8 or UTF-16.
|
|
||||||
//
|
|
||||||
// Params:
|
|
||||||
//
|
|
||||||
// buffer = The input buffer to decode.
|
|
||||||
// result = A Result struct to put decoded result and any error messages to.
|
|
||||||
//
|
|
||||||
// On error, result.errorMessage will be set.
|
|
||||||
static void decode(C)(C[] input, ref Result result)
|
|
||||||
{
|
|
||||||
// End of part of input that contains complete characters that can be decoded.
|
|
||||||
const size_t end = endOfLastUTFSequence(input);
|
|
||||||
// If end is 0, there are no full chars.
|
|
||||||
// This can happen at the end of file if there is an incomplete UTF sequence.
|
|
||||||
if(end < input.length)
|
|
||||||
{
|
|
||||||
result.errorMessage = "Invalid UTF character at the end of input";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const srclength = input.length;
|
|
||||||
try for(size_t srcpos = 0; srcpos < srclength;)
|
|
||||||
{
|
|
||||||
const c = input[srcpos];
|
|
||||||
if(c < 0x80)
|
|
||||||
{
|
|
||||||
result.decoded ~= c;
|
|
||||||
++srcpos;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result.decoded ~= std.utf.decode(input, srcpos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch(UTFException e)
|
|
||||||
{
|
|
||||||
result.errorMessage = e.msg;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
catch(Exception e)
|
|
||||||
{
|
|
||||||
assert(false, "Unexpected exception in decode(): " ~ e.msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final switch(encoding)
|
|
||||||
{
|
|
||||||
case UTFEncoding.UTF_8: decode(cast(char[])input, result); break;
|
|
||||||
case UTFEncoding.UTF_16:
|
|
||||||
assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
|
|
||||||
decode(cast(wchar[])input, result);
|
|
||||||
break;
|
|
||||||
case UTFEncoding.UTF_32:
|
|
||||||
assert(input.length % 4 == 0,
|
|
||||||
"UTF-32 buffer size must be a multiple of 4");
|
|
||||||
// No need to decode anything
|
|
||||||
result.decoded = cast(dchar[])input;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(result.errorMessage !is null) { return result; }
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Determine the end of last UTF-8 or UTF-16 sequence in a raw buffer.
|
|
||||||
size_t endOfLastUTFSequence(C)(const C[] buffer)
|
|
||||||
{
|
|
||||||
static if(is(C == char))
|
|
||||||
{
|
|
||||||
for(long end = buffer.length - 1; end >= 0; --end)
|
|
||||||
{
|
|
||||||
const stride = utf8Stride[buffer[cast(size_t)end]];
|
|
||||||
if(stride != 0xFF)
|
|
||||||
{
|
|
||||||
// If stride goes beyond end of the buffer, return end.
|
|
||||||
// Otherwise the last sequence ends at buffer.length, so we can
|
|
||||||
// return that. (Unless there is an invalid code unit, which is
|
|
||||||
// caught at decoding)
|
|
||||||
return (stride > buffer.length - end) ? cast(size_t)end : buffer.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
else static if(is(C == wchar))
|
|
||||||
{
|
|
||||||
// TODO this is O(N), which is slow. Find out if we can somehow go
|
|
||||||
// from the end backwards with UTF-16.
|
|
||||||
size_t end = 0;
|
|
||||||
while(end < buffer.length)
|
|
||||||
{
|
|
||||||
const s = stride(buffer, end);
|
|
||||||
if(s + end > buffer.length) { break; }
|
|
||||||
end += s;
|
|
||||||
}
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// UTF-8 codepoint strides (0xFF are codepoints that can't start a sequence).
|
|
||||||
immutable ubyte[256] utf8Stride =
|
|
||||||
[
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
||||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
||||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
||||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
||||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
|
||||||
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
|
|
||||||
];
|
|
Loading…
Reference in a new issue