Optimized dumping performance. Dumping is now about half as fast
as loading. Also slightly improved loading performance. Greatly improved performance with very small files. We can now load 10000 24 byte files in a second, and dump them at the same speed. Fixed another FastCharSearch bug.
This commit is contained in:
parent
8b995e5061
commit
62f7e2e4df
|
@ -113,6 +113,18 @@ struct Dumper
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
///Resolver used by default.
|
||||||
|
static Resolver defaultResolver_;
|
||||||
|
///Representer used by default.
|
||||||
|
static Representer defaultRepresenter_;
|
||||||
|
|
||||||
|
static this()
|
||||||
|
{
|
||||||
|
defaultResolver_ = new Resolver;
|
||||||
|
defaultRepresenter_ = new Representer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///Resolver to resolve tags.
|
///Resolver to resolve tags.
|
||||||
Resolver resolver_;
|
Resolver resolver_;
|
||||||
///Representer to represent data types.
|
///Representer to represent data types.
|
||||||
|
@ -167,8 +179,8 @@ struct Dumper
|
||||||
///Construct a Dumper writing to a _stream. This is useful to e.g. write to memory.
|
///Construct a Dumper writing to a _stream. This is useful to e.g. write to memory.
|
||||||
this(Stream stream)
|
this(Stream stream)
|
||||||
{
|
{
|
||||||
resolver_ = new Resolver();
|
resolver_ = defaultResolver_;
|
||||||
representer_ = new Representer();
|
representer_ = defaultRepresenter_;
|
||||||
stream_ = stream;
|
stream_ = stream;
|
||||||
Anchor.addReference();
|
Anchor.addReference();
|
||||||
TagDirectives.addReference();
|
TagDirectives.addReference();
|
||||||
|
|
|
@ -30,6 +30,7 @@ import dyaml.encoding;
|
||||||
import dyaml.escapes;
|
import dyaml.escapes;
|
||||||
import dyaml.event;
|
import dyaml.event;
|
||||||
import dyaml.exception;
|
import dyaml.exception;
|
||||||
|
import dyaml.fastcharsearch;
|
||||||
import dyaml.flags;
|
import dyaml.flags;
|
||||||
import dyaml.linebreak;
|
import dyaml.linebreak;
|
||||||
import dyaml.queue;
|
import dyaml.queue;
|
||||||
|
@ -63,6 +64,9 @@ align(4) struct ScalarAnalysis
|
||||||
"allowSingleQuoted", "allowDoubleQuoted", "allowBlock", "isNull") flags;
|
"allowSingleQuoted", "allowDoubleQuoted", "allowBlock", "isNull") flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///Quickly determines if a character is a newline.
|
||||||
|
private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_;
|
||||||
|
|
||||||
//Emits YAML events into a file/stream.
|
//Emits YAML events into a file/stream.
|
||||||
struct Emitter
|
struct Emitter
|
||||||
{
|
{
|
||||||
|
@ -365,10 +369,13 @@ struct Emitter
|
||||||
|
|
||||||
bool eq(ref tagDirective a, ref tagDirective b){return a.handle == b.handle;}
|
bool eq(ref tagDirective a, ref tagDirective b){return a.handle == b.handle;}
|
||||||
//Add any default tag directives that have not been overriden.
|
//Add any default tag directives that have not been overriden.
|
||||||
foreach(ref def; defaultTagDirectives_) if(!canFind!eq(tagDirectives_, def))
|
foreach(ref def; defaultTagDirectives_)
|
||||||
|
{
|
||||||
|
if(!std.algorithm.canFind!eq(tagDirectives_, def))
|
||||||
{
|
{
|
||||||
tagDirectives_ ~= def;
|
tagDirectives_ ~= def;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const implicit = first && !event_.explicitDocument && !canonical_ &&
|
const implicit = first && !event_.explicitDocument && !canonical_ &&
|
||||||
YAMLVersion is null && tagDirectives.isNull() &&
|
YAMLVersion is null && tagDirectives.isNull() &&
|
||||||
|
@ -1017,15 +1024,18 @@ struct Emitter
|
||||||
|
|
||||||
foreach(const size_t index, const dchar c; scalar)
|
foreach(const size_t index, const dchar c; scalar)
|
||||||
{
|
{
|
||||||
|
mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
|
||||||
|
mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
|
||||||
|
|
||||||
//Check for indicators.
|
//Check for indicators.
|
||||||
if(index == 0)
|
if(index == 0)
|
||||||
{
|
{
|
||||||
//Leading indicators are special characters.
|
//Leading indicators are special characters.
|
||||||
if("#,[]{}&*!|>\'\"%@`"d.canFind(c))
|
if(specialCharSearch.canFind(c))
|
||||||
{
|
{
|
||||||
flowIndicators = blockIndicators = true;
|
flowIndicators = blockIndicators = true;
|
||||||
}
|
}
|
||||||
if("?:"d.canFind(c))
|
if(':' == c || '?' == c)
|
||||||
{
|
{
|
||||||
flowIndicators = true;
|
flowIndicators = true;
|
||||||
if(followedByWhitespace){blockIndicators = true;}
|
if(followedByWhitespace){blockIndicators = true;}
|
||||||
|
@ -1038,7 +1048,7 @@ struct Emitter
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//Some indicators cannot appear within a scalar as well.
|
//Some indicators cannot appear within a scalar as well.
|
||||||
if(",?[]{}"d.canFind(c)){flowIndicators = true;}
|
if(flowIndicatorSearch.canFind(c)){flowIndicators = true;}
|
||||||
if(c == ':')
|
if(c == ':')
|
||||||
{
|
{
|
||||||
flowIndicators = true;
|
flowIndicators = true;
|
||||||
|
@ -1051,7 +1061,7 @@ struct Emitter
|
||||||
}
|
}
|
||||||
|
|
||||||
//Check for line breaks, special, and unicode characters.
|
//Check for line breaks, special, and unicode characters.
|
||||||
if("\n\u0085\u2028\u2029"d.canFind(c)){lineBreaks = true;}
|
if(newlineSearch_.canFind(c)){lineBreaks = true;}
|
||||||
if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) &&
|
if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) &&
|
||||||
!((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') ||
|
!((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') ||
|
||||||
(c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF'))
|
(c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF'))
|
||||||
|
@ -1068,7 +1078,7 @@ struct Emitter
|
||||||
previousSpace = true;
|
previousSpace = true;
|
||||||
previousBreak = false;
|
previousBreak = false;
|
||||||
}
|
}
|
||||||
else if("\n\u0085\u2028\u2029"d.canFind(c))
|
else if(newlineSearch_.canFind(c))
|
||||||
{
|
{
|
||||||
if(index == 0){leadingBreak = true;}
|
if(index == 0){leadingBreak = true;}
|
||||||
if(index == scalar.length - 1){trailingBreak = true;}
|
if(index == scalar.length - 1){trailingBreak = true;}
|
||||||
|
@ -1081,10 +1091,11 @@ struct Emitter
|
||||||
previousSpace = previousBreak = false;
|
previousSpace = previousBreak = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
|
||||||
//Prepare for the next character.
|
//Prepare for the next character.
|
||||||
preceededByWhitespace = "\0\n\r\u0085\u2028\u2029 \t"d.canFind(c);
|
preceededByWhitespace = spaceSearch.canFind(c);
|
||||||
followedByWhitespace = index + 2 >= scalar.length ||
|
followedByWhitespace = index + 2 >= scalar.length ||
|
||||||
"\0\n\r\u0085\u2028\u2029 \t"d.canFind(scalar[index + 2]);
|
spaceSearch.canFind(scalar[index + 2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
with(analysis.flags)
|
with(analysis.flags)
|
||||||
|
@ -1310,14 +1321,14 @@ struct ScalarWriter
|
||||||
}
|
}
|
||||||
else if(breaks_)
|
else if(breaks_)
|
||||||
{
|
{
|
||||||
if(!"\n\u0085\u2028\u2029"d.canFind(c))
|
if(!newlineSearch_.canFind(c))
|
||||||
{
|
{
|
||||||
writeStartLineBreak();
|
writeStartLineBreak();
|
||||||
writeLineBreaks();
|
writeLineBreaks();
|
||||||
emitter_.writeIndent();
|
emitter_.writeIndent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if((c == dcharNone || "\' \n\u0085\u2028\u2029"d.canFind(c))
|
else if((c == dcharNone || "\' "d.canFind(c) || newlineSearch_.canFind(c))
|
||||||
&& startChar_ < endChar_)
|
&& startChar_ < endChar_)
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
|
@ -1410,7 +1421,7 @@ struct ScalarWriter
|
||||||
const dchar c = nextChar();
|
const dchar c = nextChar();
|
||||||
if(breaks_)
|
if(breaks_)
|
||||||
{
|
{
|
||||||
if(!"\n\u0085\u2028\u2029"d.canFind(c))
|
if(!newlineSearch_.canFind(c))
|
||||||
{
|
{
|
||||||
if(!leadingSpace && c != dcharNone && c != ' ')
|
if(!leadingSpace && c != dcharNone && c != ' ')
|
||||||
{
|
{
|
||||||
|
@ -1433,7 +1444,7 @@ struct ScalarWriter
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(c == dcharNone || " \n\u0085\u2028\u2029"d.canFind(c))
|
else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ')
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
if(c == dcharNone){emitter_.writeLineBreak();}
|
if(c == dcharNone){emitter_.writeLineBreak();}
|
||||||
|
@ -1454,13 +1465,13 @@ struct ScalarWriter
|
||||||
const dchar c = nextChar();
|
const dchar c = nextChar();
|
||||||
if(breaks_)
|
if(breaks_)
|
||||||
{
|
{
|
||||||
if(!"\n\u0085\u2028\u2029"d.canFind(c))
|
if(!newlineSearch_.canFind(c))
|
||||||
{
|
{
|
||||||
writeLineBreaks();
|
writeLineBreaks();
|
||||||
if(c != dcharNone){emitter_.writeIndent();}
|
if(c != dcharNone){emitter_.writeIndent();}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(c == dcharNone || "\n\u0085\u2028\u2029"d.canFind(c))
|
else if(c == dcharNone || newlineSearch_.canFind(c))
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".no);
|
writeCurrentRange(Flag!"UpdateColumn".no);
|
||||||
if(c == dcharNone){emitter_.writeLineBreak();}
|
if(c == dcharNone){emitter_.writeLineBreak();}
|
||||||
|
@ -1500,14 +1511,14 @@ struct ScalarWriter
|
||||||
}
|
}
|
||||||
else if(breaks_)
|
else if(breaks_)
|
||||||
{
|
{
|
||||||
if(!"\n\u0085\u2028\u2029"d.canFind(c))
|
if(!newlineSearch_.canFind(c))
|
||||||
{
|
{
|
||||||
writeStartLineBreak();
|
writeStartLineBreak();
|
||||||
writeLineBreaks();
|
writeLineBreaks();
|
||||||
writeIndent(Flag!"ResetSpace".yes);
|
writeIndent(Flag!"ResetSpace".yes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(c == dcharNone || " \n\u0085\u2028\u2029"d.canFind(c))
|
else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ')
|
||||||
{
|
{
|
||||||
writeCurrentRange(Flag!"UpdateColumn".yes);
|
writeCurrentRange(Flag!"UpdateColumn".yes);
|
||||||
}
|
}
|
||||||
|
@ -1521,8 +1532,15 @@ struct ScalarWriter
|
||||||
{
|
{
|
||||||
++endChar_;
|
++endChar_;
|
||||||
endByte_ = nextEndByte_;
|
endByte_ = nextEndByte_;
|
||||||
return endByte_ < text_.length ? decode(text_, nextEndByte_)
|
if(endByte_ >= text_.length){return dcharNone;}
|
||||||
: dcharNone;
|
const c = text_[nextEndByte_];
|
||||||
|
//c is ascii, no need to decode.
|
||||||
|
if(c < 0x80)
|
||||||
|
{
|
||||||
|
++nextEndByte_;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
return decode(text_, nextEndByte_);
|
||||||
}
|
}
|
||||||
|
|
||||||
///Get character at start of the text range.
|
///Get character at start of the text range.
|
||||||
|
@ -1555,16 +1573,15 @@ struct ScalarWriter
|
||||||
const last = lastChar(text_, end);
|
const last = lastChar(text_, end);
|
||||||
const secondLast = end > 0 ? lastChar(text_, end) : 0;
|
const secondLast = end > 0 ? lastChar(text_, end) : 0;
|
||||||
|
|
||||||
if(" \n\u0085\u2028\u2029"d.canFind(text_[0]))
|
if(newlineSearch_.canFind(text_[0]) || text_[0] == ' ')
|
||||||
{
|
{
|
||||||
hints[hintsIdx++] = cast(char)('0' + bestIndent);
|
hints[hintsIdx++] = cast(char)('0' + bestIndent);
|
||||||
}
|
}
|
||||||
if(!"\n\u0085\u2028\u2029"d.canFind(last))
|
if(!newlineSearch_.canFind(last))
|
||||||
{
|
{
|
||||||
hints[hintsIdx++] = '-';
|
hints[hintsIdx++] = '-';
|
||||||
}
|
}
|
||||||
else if(std.utf.count(text_) == 1 ||
|
else if(std.utf.count(text_) == 1 || newlineSearch_.canFind(secondLast))
|
||||||
"\n\u0085\u2028\u2029"d.canFind(secondLast))
|
|
||||||
{
|
{
|
||||||
hints[hintsIdx++] = '+';
|
hints[hintsIdx++] = '+';
|
||||||
}
|
}
|
||||||
|
@ -1636,7 +1653,7 @@ struct ScalarWriter
|
||||||
void updateBreaks(in dchar c, in Flag!"UpdateSpaces" updateSpaces)
|
void updateBreaks(in dchar c, in Flag!"UpdateSpaces" updateSpaces)
|
||||||
{
|
{
|
||||||
if(c == dcharNone){return;}
|
if(c == dcharNone){return;}
|
||||||
breaks_ = "\n\u0085\u2028\u2029"d.canFind(c);
|
breaks_ = newlineSearch_.canFind(c);
|
||||||
if(updateSpaces){spaces_ = c == ' ';}
|
if(updateSpaces){spaces_ = c == ' ';}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,7 +89,7 @@ string searchCode(dstring chars, uint tableSize)()
|
||||||
|
|
||||||
code ~= specialChars.length
|
code ~= specialChars.length
|
||||||
? " return " ~ specialCharsCode() ~ ";\n"
|
? " return " ~ specialCharsCode() ~ ";\n"
|
||||||
: " return false";
|
: " return false;";
|
||||||
code ~= "}\n";
|
code ~= "}\n";
|
||||||
|
|
||||||
return code;
|
return code;
|
||||||
|
|
|
@ -90,6 +90,18 @@ import dyaml.token;
|
||||||
struct Loader
|
struct Loader
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
///Resolver used by default.
|
||||||
|
static Resolver defaultResolver_;
|
||||||
|
///Constructor used by default.
|
||||||
|
static Constructor defaultConstructor_;
|
||||||
|
|
||||||
|
static this()
|
||||||
|
{
|
||||||
|
defaultResolver_ = new Resolver;
|
||||||
|
defaultConstructor_ = new Constructor;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///Reads character data from a stream.
|
///Reads character data from a stream.
|
||||||
Reader reader_;
|
Reader reader_;
|
||||||
///Processes character data to YAML tokens.
|
///Processes character data to YAML tokens.
|
||||||
|
@ -138,8 +150,8 @@ struct Loader
|
||||||
reader_ = new Reader(stream);
|
reader_ = new Reader(stream);
|
||||||
scanner_ = new Scanner(reader_);
|
scanner_ = new Scanner(reader_);
|
||||||
parser_ = new Parser(scanner_);
|
parser_ = new Parser(scanner_);
|
||||||
resolver_ = new Resolver;
|
resolver_ = defaultResolver_;
|
||||||
constructor_ = new Constructor;
|
constructor_ = defaultConstructor_;
|
||||||
Anchor.addReference();
|
Anchor.addReference();
|
||||||
TagDirectives.addReference();
|
TagDirectives.addReference();
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,10 +148,12 @@ final class Reader
|
||||||
* or if invalid data is read.
|
* or if invalid data is read.
|
||||||
*/
|
*/
|
||||||
dchar peek(in size_t index = 0)
|
dchar peek(in size_t index = 0)
|
||||||
|
{
|
||||||
|
if(buffer_.length <= bufferOffset_ + index + 1)
|
||||||
{
|
{
|
||||||
updateBuffer(index + 1);
|
updateBuffer(index + 1);
|
||||||
|
}
|
||||||
if(buffer_.length < bufferOffset_ + index + 1)
|
if(buffer_.length <= bufferOffset_ + index)
|
||||||
{
|
{
|
||||||
throw new ReaderException("Trying to read past the end of the stream");
|
throw new ReaderException("Trying to read past the end of the stream");
|
||||||
}
|
}
|
||||||
|
@ -175,7 +177,10 @@ final class Reader
|
||||||
dstring prefix(in size_t length)
|
dstring prefix(in size_t length)
|
||||||
{
|
{
|
||||||
if(length == 0){return "";}
|
if(length == 0){return "";}
|
||||||
|
if(buffer_.length <= bufferOffset_ + length)
|
||||||
|
{
|
||||||
updateBuffer(length);
|
updateBuffer(length);
|
||||||
|
}
|
||||||
const end = min(buffer_.length, bufferOffset_ + length);
|
const end = min(buffer_.length, bufferOffset_ + length);
|
||||||
//need to duplicate as we change buffer content with C functions
|
//need to duplicate as we change buffer content with C functions
|
||||||
//and could end up with returned string referencing changed data
|
//and could end up with returned string referencing changed data
|
||||||
|
@ -225,7 +230,11 @@ final class Reader
|
||||||
void forward(size_t length = 1)
|
void forward(size_t length = 1)
|
||||||
{
|
{
|
||||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||||
|
|
||||||
|
if(buffer_.length <= bufferOffset_ + length + 1)
|
||||||
|
{
|
||||||
updateBuffer(length + 1);
|
updateBuffer(length + 1);
|
||||||
|
}
|
||||||
|
|
||||||
while(length > 0)
|
while(length > 0)
|
||||||
{
|
{
|
||||||
|
@ -272,8 +281,6 @@ final class Reader
|
||||||
*/
|
*/
|
||||||
void updateBuffer(in size_t length)
|
void updateBuffer(in size_t length)
|
||||||
{
|
{
|
||||||
if(buffer_.length > bufferOffset_ + length){return;}
|
|
||||||
|
|
||||||
//get rid of unneeded data in the buffer
|
//get rid of unneeded data in the buffer
|
||||||
if(bufferOffset_ > 0)
|
if(bufferOffset_ > 0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -144,8 +144,8 @@ final class Resolver
|
||||||
|
|
||||||
if(kind == NodeID.Scalar)
|
if(kind == NodeID.Scalar)
|
||||||
{
|
{
|
||||||
if(implicit)
|
if(!implicit){return defaultScalarTag_;}
|
||||||
{
|
|
||||||
//Get the first char of the value.
|
//Get the first char of the value.
|
||||||
size_t dummy;
|
size_t dummy;
|
||||||
const dchar first = value.length == 0 ? '\0' : decode(value, dummy);
|
const dchar first = value.length == 0 ? '\0' : decode(value, dummy);
|
||||||
|
@ -153,15 +153,11 @@ final class Resolver
|
||||||
auto resolvers = (first in yamlImplicitResolvers_) is null ?
|
auto resolvers = (first in yamlImplicitResolvers_) is null ?
|
||||||
[] : yamlImplicitResolvers_[first];
|
[] : yamlImplicitResolvers_[first];
|
||||||
|
|
||||||
foreach(resolver; resolvers)
|
|
||||||
{
|
|
||||||
//If regexp matches, return tag.
|
//If regexp matches, return tag.
|
||||||
if(!(match(value, resolver[1]).empty))
|
foreach(resolver; resolvers) if(!(match(value, resolver[1]).empty))
|
||||||
{
|
{
|
||||||
return resolver[0];
|
return resolver[0];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
return defaultScalarTag_;
|
return defaultScalarTag_;
|
||||||
}
|
}
|
||||||
else if(kind == NodeID.Sequence){return defaultSequenceTag_;}
|
else if(kind == NodeID.Sequence){return defaultSequenceTag_;}
|
||||||
|
|
|
@ -194,8 +194,8 @@ struct Serializer
|
||||||
{
|
{
|
||||||
assert(node.isType!string, "Scalar node type must be string before serialized");
|
assert(node.isType!string, "Scalar node type must be string before serialized");
|
||||||
auto value = node.as!string;
|
auto value = node.as!string;
|
||||||
const Tag detectedTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, true);
|
const detectedTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, true);
|
||||||
const Tag defaultTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, false);
|
const defaultTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, false);
|
||||||
bool isDetected = node.tag_ == detectedTag;
|
bool isDetected = node.tag_ == detectedTag;
|
||||||
bool isDefault = node.tag_ == defaultTag;
|
bool isDefault = node.tag_ == defaultTag;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue