Optimized GC performance when loading,

greatly decreasing memory usage and
improving speed by 30% or more.
This commit is contained in:
Ferdinand Majerech 2011-10-23 20:17:37 +02:00
parent 915428c8ed
commit 9d68b6fa9a
10 changed files with 191 additions and 136 deletions

View file

@ -29,7 +29,7 @@ links = ../index.html Documentation home
# Source files or patterns to ignore. Supports regexp syntax.
# E.g; To ignore main.d and all source files in the test/ directory,
# you would use: "main.d test/*"
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d
[DDOC]
# Command to use to generate the documentation.

View file

@ -26,6 +26,7 @@ import std.utf;
import dyaml.anchor;
import dyaml.encoding;
import dyaml.escapes;
import dyaml.event;
import dyaml.exception;
import dyaml.flags;
@ -1343,23 +1344,6 @@ struct ScalarWriter
///Write text as double quoted scalar.
void writeDoubleQuoted()
{
immutable dchar[dchar] escapeReplacements =
['\0': '0',
'\x07': 'a',
'\x08': 'b',
'\x09': 't',
'\x0A': 'n',
'\x0B': 'v',
'\x0C': 'f',
'\x0D': 'r',
'\x1B': 'e',
'\"': '\"',
'\\': '\\',
'\u0085': 'N',
'\xA0': '_',
'\u2028': 'L',
'\u2029': 'P'];
resetTextPosition();
emitter_.writeIndicator("\"", true);
do
@ -1377,10 +1361,10 @@ struct ScalarWriter
if(c != dcharNone)
{
auto appender = appender!string();
if((c in escapeReplacements) !is null)
if((c in dyaml.escapes.toEscapes) !is null)
{
appender.put('\\');
appender.put(escapeReplacements[c]);
appender.put(dyaml.escapes.toEscapes[c]);
}
else
{

61
dyaml/escapes.d Normal file
View file

@ -0,0 +1,61 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module dyaml.escapes;
package:
///Translation table from YAML escapes to dchars.
dchar[dchar] fromEscapes;
///Translation table from dchars to YAML escapes.
dchar[dchar] toEscapes;
///Translation table from prefixes of escaped hexadecimal format characters to their lengths.
uint[dchar] escapeHexCodes;
static this()
{
fromEscapes =
['0': '\0',
'a': '\x07',
'b': '\x08',
't': '\x09',
'\t': '\x09',
'n': '\x0A',
'v': '\x0B',
'f': '\x0C',
'r': '\x0D',
'e': '\x1B',
' ': '\x20',
'\"': '\"',
'\\': '\\',
'N': '\u0085',
'_': '\xA0',
'L': '\u2028',
'P': '\u2029'];
toEscapes =
['\0': '0',
'\x07': 'a',
'\x08': 'b',
'\x09': 't',
'\x0A': 'n',
'\x0B': 'v',
'\x0C': 'f',
'\x0D': 'r',
'\x1B': 'e',
'\"': '\"',
'\\': '\\',
'\u0085': 'N',
'\xA0': '_',
'\u2028': 'L',
'\u2029': 'P'];
escapeHexCodes = ['x': 2, 'u': 4, 'U': 8];
}

View file

@ -183,7 +183,7 @@ Event documentEndEvent(Mark start, Mark end, bool explicit)
* style = Scalar style.
*/
Event scalarEvent(in Mark start, in Mark end, in Anchor anchor, in Tag tag,
in bool[2] implicit, in string value,
in Tuple!(bool, bool) implicit, in string value,
in ScalarStyle style = ScalarStyle.Invalid)
{
return Event(value, start, end, anchor, tag, EventID.Scalar, style, implicit[0],

View file

@ -1371,5 +1371,14 @@ void merge(ref Node.Pair[] pairs, ref Node.Pair toMerge)
*/
void merge(ref Node.Pair[] pairs, Node.Pair[] toMerge)
{
foreach(ref pair; toMerge){merge(pairs, pair);}
bool eq(ref Node.Pair a, ref Node.Pair b){return a.key == b.key;}
//Preallocating to limit GC reallocations.
auto len = pairs.length;
pairs.length = len + toMerge.length;
foreach(ref pair; toMerge) if(!canFind!eq(pairs, pair))
{
pairs[len++] = pair;
}
pairs.length = len;
}

View file

@ -12,6 +12,7 @@ module dyaml.parser;
import std.array;
import std.container;
import std.conv;
import std.exception;
import std.typecons;
@ -124,9 +125,10 @@ final class Parser
tagDirective[] tagHandles_;
///Stack of states.
Event delegate()[] states_;
Array!(Event delegate()) states_;
///Stack of marks used to keep track of extents of e.g. YAML collections.
Mark[] marks_;
Array!Mark marks_;
///Current state.
Event delegate() state_;
@ -136,6 +138,8 @@ final class Parser
{
state_ = &parseStreamStart;
scanner_ = scanner;
states_.reserve(32);
marks_.reserve(32);
}
///Destroy the parser.
@ -145,9 +149,7 @@ final class Parser
clear(tagHandles_);
tagHandles_ = null;
clear(states_);
states_ = null;
clear(marks_);
marks_ = null;
}
/**
@ -228,8 +230,8 @@ final class Parser
{
enforce(states_.length > 0,
new YAMLException("Parser: Need to pop state but no states left to pop"));
const result = states_.back();
states_.popBack;
const result = states_.back;
states_.length = states_.length - 1;
return result;
}
@ -238,8 +240,8 @@ final class Parser
{
enforce(marks_.length > 0,
new YAMLException("Parser: Need to pop mark but no marks left to pop"));
const result = marks_.back();
marks_.popBack;
const result = marks_.back;
marks_.length = marks_.length - 1;
return result;
}
@ -468,7 +470,7 @@ final class Parser
bool implicit_2 = (!implicit) && tag is null;
state_ = popState();
return scalarEvent(startMark, token.endMark, Anchor(anchor), Tag(tag),
[implicit, implicit_2], token.value, token.style);
tuple(implicit, implicit_2), token.value, token.style);
}
if(scanner_.checkToken(TokenID.FlowSequenceStart))
@ -512,7 +514,7 @@ final class Parser
//Empty scalars are allowed even if a tag or an anchor is specified.
return scalarEvent(startMark, endMark, Anchor(anchor), Tag(tag),
[implicit, false] , "");
tuple(implicit, false) , "");
}
immutable token = scanner_.peekToken();
@ -585,7 +587,7 @@ final class Parser
if(!scanner_.checkToken(TokenID.BlockEnd))
{
immutable token = scanner_.peekToken();
throw new Error("While parsing a block collection", marks_[$ - 1],
throw new Error("While parsing a block collection", marks_.back,
"expected block end, but found " ~ token.idString,
token.startMark);
}
@ -650,7 +652,7 @@ final class Parser
if(!scanner_.checkToken(TokenID.BlockEnd))
{
immutable token = scanner_.peekToken();
throw new Error("While parsing a block mapping", marks_[$ - 1],
throw new Error("While parsing a block mapping", marks_.back,
"expected block end, but found: " ~ token.idString,
token.startMark);
}
@ -711,7 +713,7 @@ final class Parser
else
{
immutable token = scanner_.peekToken;
throw new Error("While parsing a flow sequence", marks_[$ - 1],
throw new Error("While parsing a flow sequence", marks_.back,
"expected ',' or ']', but got: " ~
token.idString, token.startMark);
}
@ -818,7 +820,7 @@ final class Parser
else
{
immutable token = scanner_.peekToken;
throw new Error("While parsing a flow mapping", marks_[$ - 1],
throw new Error("While parsing a flow mapping", marks_.back,
"expected ',' or '}', but got: " ~
token.idString, token.startMark);
}
@ -860,6 +862,6 @@ final class Parser
{
//PyYAML uses a Tuple!(true, false) for the second last arg here,
//but the second bool is never used after that - so we don't use it.
return scalarEvent(mark, mark, Anchor(), Tag(), [true, false], "");
return scalarEvent(mark, mark, Anchor(), Tag(), tuple(true, false), "");
}
}

View file

@ -138,8 +138,11 @@ final class Reader
{
updateBuffer(index + 1);
enforce(buffer_.length >= bufferOffset_ + index + 1,
new ReaderException("Trying to read past the end of the stream"));
if(buffer_.length < bufferOffset_ + index + 1)
{
throw new ReaderException("Trying to read past the end of the stream");
}
return buffer_[bufferOffset_ + index];
}
@ -205,6 +208,8 @@ final class Reader
*/
void forward(size_t length = 1)
{
//This is here due to optimization.
static newlines = "\n\u0085\u2028\u2029";
updateBuffer(length + 1);
while(length > 0)
@ -212,9 +217,8 @@ final class Reader
const c = buffer_[bufferOffset_];
++bufferOffset_;
++charIndex_;
//new line
if(['\n', '\u0085', '\u2028', '\u2029'].canFind(c) ||
(c == '\r' && buffer_[bufferOffset_] != '\n'))
//New line.
if(newlines.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{
++line_;
column_ = 0;
@ -246,7 +250,7 @@ final class Reader
* If there are not enough characters in the stream, it will get
* as many as possible.
*
* Params: length = Number of characters we need to read.
* Params: length = Mimimum number of characters we need to read.
*
* Throws: ReaderException if trying to read past the end of the stream
* or if invalid data is read.
@ -265,10 +269,10 @@ final class Reader
bufferOffset_ = 0;
}
////load chars in batches of at most 64 bytes
////Load chars in batches of at most 1024 bytes (256 chars)
while(buffer_.length <= bufferOffset_ + length)
{
loadChars(16);
loadChars(256);
if(done)
{
@ -290,10 +294,8 @@ final class Reader
* if nonprintable characters are detected, or
* if there is an error reading from the stream.
*/
void loadChars(in uint chars)
void loadChars(uint chars)
{
const oldLength = buffer_.length;
/**
* Get next character from the stream.
*
@ -369,15 +371,25 @@ final class Reader
}
}
const oldLength = buffer_.length;
const oldPosition = stream_.position;
try
//Preallocating memory to limit GC reallocations.
buffer_.length = buffer_.length + chars;
scope(exit)
{
foreach(i; 0 .. chars)
{
if(done){break;}
const available = stream_.available;
buffer_ ~= getDChar(available);
}
buffer_.length = buffer_.length - chars;
enforce(printable(buffer_[oldLength .. $]),
new ReaderException("Special unicode characters are not allowed"));
}
try for(uint c = 0; chars; --chars, ++c)
{
if(done){break;}
const available = stream_.available;
buffer_[oldLength + c] = getDChar(available);
}
catch(UtfException e)
{
@ -389,9 +401,6 @@ final class Reader
{
throw new ReaderException(e.msg);
}
enforce(printable(buffer_[oldLength .. $]),
new ReaderException("Special unicode characters are not allowed"));
}
/**

View file

@ -22,6 +22,7 @@ import std.string;
import std.typecons;
import std.utf;
import dyaml.escapes;
import dyaml.exception;
import dyaml.queue;
import dyaml.reader;
@ -140,12 +141,16 @@ final class Scanner
///Possible simple keys indexed by flow levels.
SimpleKey[uint] possibleSimpleKeys_;
///Used for constructing strings while limiting reallocation.
Appender!(dchar[]) appender_;
public:
///Construct a Scanner using specified Reader.
this(Reader reader)
{
//Return the next token, but do not delete it from the queue
reader_ = reader;
appender_ = appender!(dchar[])();
fetchStreamStart();
}
@ -156,6 +161,7 @@ final class Scanner
clear(indents_);
indents_ = null;
clear(possibleSimpleKeys_);
clear(appender_);
possibleSimpleKeys_ = null;
reader_ = null;
}
@ -412,8 +418,7 @@ final class Scanner
removePossibleSimpleKey();
allowSimpleKey_ = false;
//There's probably a saner way to clear an associated array than this.
SimpleKey[uint] empty;
possibleSimpleKeys_ = empty;
clear(possibleSimpleKeys_);
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
done_ = true;
@ -1066,15 +1071,15 @@ final class Scanner
dstring lineBreak = "";
//Used to construct the result.
auto appender = appender!string();
//Using appender_, so clear it when we're done.
scope(exit){appender_.clear();}
//Scan the inner part of the block scalar.
while(reader_.column == indent && reader_.peek() != '\0')
{
appender.put(breaks);
appender_.put(breaks);
const bool leadingNonSpace = !" \t".canFind(reader_.peek());
appender.put(scanToNextBreak());
appender_.put(scanToNextBreak());
lineBreak = ""d ~ scanLineBreak();
auto scalarBreaks = scanBlockScalarBreaks(indent);
@ -1089,9 +1094,9 @@ final class Scanner
if(style == ScalarStyle.Folded && lineBreak == "\n" &&
leadingNonSpace && !" \t".canFind(reader_.peek()))
{
if(breaks.length == 0){appender.put(' ');}
if(breaks.length == 0){appender_.put(' ');}
}
else{appender.put(lineBreak);}
else{appender_.put(lineBreak);}
////this is Clark Evans's interpretation (also in the spec
////examples):
//
@ -1099,18 +1104,18 @@ final class Scanner
//{
// if(breaks.length == 0)
// {
// if(!" \t"d.canFind(reader_.peek())){appender.put(' ');}
// if(!" \t"d.canFind(reader_.peek())){appender_.put(' ');}
// else{chunks ~= lineBreak;}
// }
//}
//else{appender.put(lineBreak);}
//else{appender_.put(lineBreak);}
}
else{break;}
}
if(chomping != Chomping.Strip){appender.put(lineBreak);}
if(chomping == Chomping.Keep){appender.put(breaks);}
if(chomping != Chomping.Strip){appender_.put(lineBreak);}
if(chomping == Chomping.Keep){appender_.put(breaks);}
return scalarToken(startMark, endMark, to!string(appender.data), style);
return scalarToken(startMark, endMark, to!string(cast(dstring)appender_.data), style);
}
///Scan chomping and indentation indicators of a scalar token.
@ -1214,45 +1219,25 @@ final class Scanner
const startMark = reader_.mark;
const quote = reader_.get();
auto appender = appender!dstring();
appender.put(scanFlowScalarNonSpaces(quotes, startMark));
//Using appender_, so clear it when we're done.
scope(exit){appender_.clear();}
//Puts scanned data to appender_.
scanFlowScalarNonSpaces(quotes, startMark);
while(reader_.peek() != quote)
{
appender.put(scanFlowScalarSpaces(startMark));
appender.put(scanFlowScalarNonSpaces(quotes, startMark));
//Puts scanned data to appender_.
scanFlowScalarSpaces(startMark);
scanFlowScalarNonSpaces(quotes, startMark);
}
reader_.forward();
return scalarToken(startMark, reader_.mark, to!string(appender.data), quotes);
return scalarToken(startMark, reader_.mark, to!string(cast(dstring)appender_.data), quotes);
}
///Scan nonspace characters in a flow scalar.
dstring scanFlowScalarNonSpaces(ScalarStyle quotes, in Mark startMark)
void scanFlowScalarNonSpaces(ScalarStyle quotes, in Mark startMark)
{
dchar[dchar] escapeReplacements =
['0': '\0',
'a': '\x07',
'b': '\x08',
't': '\x09',
'\t': '\x09',
'n': '\x0A',
'v': '\x0B',
'f': '\x0C',
'r': '\x0D',
'e': '\x1B',
' ': '\x20',
'\"': '\"',
'\\': '\\',
'N': '\u0085',
'_': '\xA0',
'L': '\u2028',
'P': '\u2029'];
uint[dchar] escapeCodes = ['x': 2, 'u': 4, 'U': 8];
//Can't use an Appender due to a Phobos bug, so appending to a string.
dstring result;
for(;;)
{
dchar c = reader_.peek();
@ -1263,33 +1248,33 @@ final class Scanner
c = reader_.peek(length);
}
if(length > 0){result ~= reader_.get(length);}
if(length > 0){appender_.put(reader_.get(length));}
c = reader_.peek();
if(quotes == ScalarStyle.SingleQuoted &&
c == '\'' && reader_.peek(1) == '\'')
{
result ~= '\'';
appender_.put('\'');
reader_.forward(2);
}
else if((quotes == ScalarStyle.DoubleQuoted && c == '\'') ||
(quotes == ScalarStyle.SingleQuoted && "\"\\".canFind(c)))
{
result ~= c;
appender_.put(c);
reader_.forward();
}
else if(quotes == ScalarStyle.DoubleQuoted && c == '\\')
{
reader_.forward();
c = reader_.peek();
if((c in escapeReplacements) !is null)
if((c in dyaml.escapes.fromEscapes) !is null)
{
result ~= escapeReplacements[c];
appender_.put(dyaml.escapes.fromEscapes[c]);
reader_.forward();
}
else if((c in escapeCodes) !is null)
else if((c in dyaml.escapes.escapeHexCodes) !is null)
{
length = escapeCodes[c];
length = dyaml.escapes.escapeHexCodes[c];
reader_.forward();
foreach(i; 0 .. length)
@ -1303,12 +1288,12 @@ final class Scanner
}
dstring hex = reader_.get(length);
result ~= cast(dchar)parse!int(hex, 16);
appender_.put(cast(dchar)parse!int(hex, 16));
}
else if("\n\r\u0085\u2028\u2029".canFind(c))
{
scanLineBreak();
result ~= scanFlowScalarBreaks(startMark);
appender_.put(scanFlowScalarBreaks(startMark));
}
else
{
@ -1318,12 +1303,15 @@ final class Scanner
to!string(c), reader_.mark);
}
}
else{return result;}
else
{
return;
}
}
}
///Scan space characters in a flow scalar.
dstring scanFlowScalarSpaces(in Mark startMark)
void scanFlowScalarSpaces(in Mark startMark)
{
uint length = 0;
while(" \t".canFind(reader_.peek(length))){++length;}
@ -1334,18 +1322,16 @@ final class Scanner
new Error("While scanning a quoted scalar", startMark,
"found unexpected end of stream", reader_.mark));
auto appender = appender!dstring();
if("\n\r\u0085\u2028\u2029".canFind(c))
{
const lineBreak = scanLineBreak();
const breaks = scanFlowScalarBreaks(startMark);
if(lineBreak != '\n'){appender.put(lineBreak);}
else if(breaks.length == 0){appender.put(' ');}
appender.put(breaks);
if(lineBreak != '\n'){appender_.put(lineBreak);}
else if(breaks.length == 0){appender_.put(' ');}
appender_.put(breaks);
}
else{appender.put(whitespaces);}
return appender.data;
else{appender_.put(whitespaces);}
}
///Scan line breaks in a flow scalar.
@ -1378,7 +1364,8 @@ final class Scanner
{
//We keep track of the allowSimpleKey_ flag here.
//Indentation rules are loosed for the flow context
auto appender = appender!dstring();
//Using appender_, so clear it when we're done.
scope(exit){appender_.clear();}
const startMark = reader_.mark;
Mark endMark = startMark;
const indent = indent_ + 1;
@ -1420,8 +1407,8 @@ final class Scanner
if(length == 0){break;}
allowSimpleKey_ = false;
appender.put(spaces);
appender.put(reader_.get(length));
appender_.put(spaces);
appender_.put(reader_.get(length));
endMark = reader_.mark;
@ -1432,7 +1419,7 @@ final class Scanner
break;
}
}
return scalarToken(startMark, endMark, to!string(appender.data), ScalarStyle.Plain);
return scalarToken(startMark, endMark, to!string(cast(dstring)appender_.data), ScalarStyle.Plain);
}
///Scan spaces in a plain scalar.
@ -1516,7 +1503,8 @@ final class Scanner
dstring scanTagURI(string name, in Mark startMark)
{
//Note: we do not check if URI is well-formed.
auto appender = appender!dstring();
//Using appender_, so clear it when we're done.
scope(exit){appender_.clear();}
uint length = 0;
dchar c = reader_.peek();
@ -1524,23 +1512,23 @@ final class Scanner
{
if(c == '%')
{
appender.put(reader_.get(length));
appender_.put(reader_.get(length));
length = 0;
appender.put(scanURIEscapes(name, startMark));
appender_.put(scanURIEscapes(name, startMark));
}
else{++length;}
c = reader_.peek(length);
}
if(length > 0)
{
appender.put(reader_.get(length));
appender_.put(reader_.get(length));
length = 0;
}
enforce(appender.data.length > 0,
enforce(appender_.data.length > 0,
new Error("While parsing a " ~ name, startMark,
"expected URI, but found: " ~ to!string(c), reader_.mark));
return appender.data;
return cast(dstring)appender_.data;
}
///Scan URI escape sequences.
@ -1606,8 +1594,7 @@ final class Scanner
{
const c = reader_.peek();
dchar[] plainLineBreaks = ['\r', '\n', '\u0085'];
if(plainLineBreaks.canFind(c))
if("\r\n\u0085".canFind(c))
{
if(reader_.prefix(2) == "\r\n"){reader_.forward(2);}
else{reader_.forward();}

View file

@ -13,6 +13,7 @@ module dyaml.serializer;
import std.array;
import std.format;
import std.typecons;
import dyaml.anchor;
import dyaml.emitter;
@ -193,12 +194,13 @@ struct Serializer
{
assert(node.isType!string, "Scalar node type must be string before serialized");
auto value = node.as!string;
Tag detectedTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, true);
Tag defaultTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, false);
const Tag detectedTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, true);
const Tag defaultTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, false);
bool isDetected = node.tag_ == detectedTag;
bool isDefault = node.tag_ == defaultTag;
emitter_.emit(scalarEvent(Mark(), Mark(), aliased, node.tag_,
[node.tag_ == detectedTag, node.tag_ == defaultTag],
value, ScalarStyle.Invalid));
tuple(isDetected, isDefault), value, ScalarStyle.Invalid));
return;
}
if(node.isSequence)

View file

@ -10,6 +10,7 @@ module dyaml.testemitter;
import std.algorithm;
import std.file;
import std.range;
import std.typecons;
import dyaml.dumper;
import dyaml.event;
@ -163,7 +164,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
if(event.id == EventID.Scalar)
{
event = scalarEvent(Mark(), Mark(), event.anchor, event.tag,
[event.implicit, event.implicit_2],
tuple(event.implicit, event.implicit_2),
event.value, style);
}
else if(event.id == EventID.SequenceStart)