Reader was reimplemented.

After experiments with loading the whole file at once, and
with decoding and parsing in separate thread, lazy reader
turned to be the fastest/least memory intensive solution.
Characters are now decoded in small batches.
This improved parsing speed by ~20%.

No global state anymore. Anchors are now zero terminated strings
and TagDirectives are a simple array. Event structure was changed
to prevent size increase.
Minor fixes and improvements.
This commit is contained in:
Ferdinand Majerech 2011-11-16 03:10:29 +01:00
parent f95f0d14c8
commit 2c9d464389
28 changed files with 707 additions and 618 deletions

Binary file not shown.

View file

@ -112,57 +112,57 @@
<dd><p>Set stream name. Used in debugging messages.</p>
</dd>
<dt class="d_decl">void <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>(Resolver <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>);
<dt class="d_decl">@property void <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>(Resolver <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>);
</dt>
<dd><p>Specify custom Resolver to use.</p>
</dd>
<dt class="d_decl">void <a name="representer"></a><span class="ddoc_psymbol">representer</span>(Representer <a name="representer"></a><span class="ddoc_psymbol">representer</span>);
<dt class="d_decl">@property void <a name="representer"></a><span class="ddoc_psymbol">representer</span>(Representer <a name="representer"></a><span class="ddoc_psymbol">representer</span>);
</dt>
<dd><p>Specify custom Representer to use.</p>
</dd>
<dt class="d_decl">void <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>(in bool <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>);
<dt class="d_decl">@property void <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>(bool <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>);
</dt>
<dd><p>Write scalars in canonical form?</p>
</dd>
<dt class="d_decl">void <a name="indent"></a><span class="ddoc_psymbol">indent</span>(in uint <a name="indent"></a><span class="ddoc_psymbol">indent</span>);
<dt class="d_decl">@property void <a name="indent"></a><span class="ddoc_psymbol">indent</span>(uint <a name="indent"></a><span class="ddoc_psymbol">indent</span>);
</dt>
<dd><p>Set indentation width. 2 by default. Must not be zero.</p>
</dd>
<dt class="d_decl">void <a name="textWidth"></a><span class="ddoc_psymbol">textWidth</span>(in uint <b>width</b>);
<dt class="d_decl">@property void <a name="textWidth"></a><span class="ddoc_psymbol">textWidth</span>(uint <b>width</b>);
</dt>
<dd><p>Set preferred text width.</p>
</dd>
<dt class="d_decl">void <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>(in LineBreak <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>);
<dt class="d_decl">@property void <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>(LineBreak <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>);
</dt>
<dd><p>Set line break to use. Unix by default.</p>
</dd>
<dt class="d_decl">void <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>(in Encoding <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>);
<dt class="d_decl">@property void <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>(Encoding <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>);
</dt>
<dd><p>Set character encoding to use. UTF-8 by default.</p>
</dd>
<dt class="d_decl">void <a name="explicitStart"></a><span class="ddoc_psymbol">explicitStart</span>(in bool <b>explicit</b>);
<dt class="d_decl">@property void <a name="explicitStart"></a><span class="ddoc_psymbol">explicitStart</span>(bool <b>explicit</b>);
</dt>
<dd><p>Always explicitly write document start?</p>
</dd>
<dt class="d_decl">void <a name="explicitEnd"></a><span class="ddoc_psymbol">explicitEnd</span>(in bool <b>explicit</b>);
<dt class="d_decl">@property void <a name="explicitEnd"></a><span class="ddoc_psymbol">explicitEnd</span>(bool <b>explicit</b>);
</dt>
<dd><p>Always explicitly write document end?</p>
</dd>
<dt class="d_decl">void <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>(in string <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>);
<dt class="d_decl">@property void <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>(string <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>);
</dt>
<dd><p>Specify YAML version string. "1.1" by default.</p>
</dd>
<dt class="d_decl">void <a name="tagDirectives"></a><span class="ddoc_psymbol">tagDirectives</span>(string[string] <b>tags</b>);
<dt class="d_decl">@property void <a name="tagDirectives"></a><span class="ddoc_psymbol">tagDirectives</span>(string[string] <b>tags</b>);
</dt>
<dd><p>Specify tag directives.
</p>

View file

@ -98,11 +98,11 @@
</pre>
</div>
<dl><dt class="d_decl">this(in const(immutable(char)[]) <b>filename</b>);
<dl><dt class="d_decl">this(string <b>filename</b>);
</dt>
<dd><p>Construct a Loader to load YAML from a file.
</p>
<b>Parameters:</b><div class="pbr"><table class=parms><tr><td valign=top>const(immutable(char)[]) <b>filename</b></td>
<b>Parameters:</b><div class="pbr"><table class=parms><tr><td valign=top>string <b>filename</b></td>
<td valign=top>Name of the file to load from.</td></tr>
</table></div>
<b>Throws:</b><div class="pbr">YAMLException if the file could not be opened or read.</div>
@ -138,6 +138,9 @@
<dd><p>Load single YAML document.
</p>
<p>If none or more than one YAML document is found, this throws a YAMLException.
<br>
This can only be called once; this is enforced by contract.
</p>
<b>Returns:</b><div class="pbr">Root node of the document.
@ -155,6 +158,9 @@
them all at once. Calling <a name="loadAll"></a><span class="ddoc_psymbol">loadAll</span> after iterating over the node or
vice versa will not return any documents, as they have all been parsed
already.
<br>
This can only be called once; this is enforced by contract.
</p>
<b>Returns:</b><div class="pbr">Array of root nodes of all documents in the file/stream.
@ -168,6 +174,9 @@
<dd><p>Foreach over YAML documents.
</p>
<p>Parses documents lazily, when they are needed.
<br>
Foreach over a Loader can only be used once; this is enforced by contract.
</p>
<b>Throws:</b><div class="pbr">YAMLException on a parsing error.</div>

View file

@ -182,7 +182,7 @@
</div>
</dd>
<dt class="d_decl">Node <a name="representScalar"></a><span class="ddoc_psymbol">representScalar</span>(in string <b>tag</b>, string <b>scalar</b>, ScalarStyle <b>style</b> = (ScalarStyle).Invalid);
<dt class="d_decl">Node <a name="representScalar"></a><span class="ddoc_psymbol">representScalar</span>(string <b>tag</b>, string <b>scalar</b>, ScalarStyle <b>style</b> = (ScalarStyle).Invalid);
</dt>
<dd><p>Represent a scalar with specified tag.
</p>
@ -217,7 +217,7 @@
</p>
</dd>
<dt class="d_decl">Node <a name="representSequence"></a><span class="ddoc_psymbol">representSequence</span>(in string <b>tag</b>, Node[] <b>sequence</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
<dt class="d_decl">Node <a name="representSequence"></a><span class="ddoc_psymbol">representSequence</span>(string <b>tag</b>, Node[] <b>sequence</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
</dt>
<dd><p>Represent a sequence with specified tag, representing children first.
</p>
@ -256,7 +256,7 @@
</p>
</dd>
<dt class="d_decl">Node <a name="representMapping"></a><span class="ddoc_psymbol">representMapping</span>(in string <b>tag</b>, Pair[] <b>pairs</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
<dt class="d_decl">Node <a name="representMapping"></a><span class="ddoc_psymbol">representMapping</span>(string <b>tag</b>, Pair[] <b>pairs</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
</dt>
<dd><p>Represent a mapping with specified tag, representing children first.
</p>

View file

@ -138,7 +138,7 @@ struct appears in Phobos.</p>
</div>
<div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011.
Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div>
</body>

View file

@ -104,7 +104,7 @@
</div>
<div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011.
Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div>
</body>

View file

@ -87,7 +87,7 @@
</div>
<div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011.
Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div>
</body>

View file

@ -368,7 +368,7 @@ directory of the D:YAML package.</p>
</div>
<div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011.
Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div>
</body>

View file

@ -237,7 +237,7 @@ example in the <tt class="docutils literal"><span class="pre">example/getting_st
</div>
<div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011.
Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div>
</body>

View file

@ -330,7 +330,7 @@ Some of these might change in the future (especially !!map and !!set).</p>
</div>
<div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011.
Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div>
</body>

View file

@ -7,24 +7,7 @@
///YAML anchor.
module dyaml.anchor;
import dyaml.sharedobject;
import dyaml.zerostring;
///YAML anchor (reference) struct. Encapsulates an anchor to save memory.
struct Anchor
{
public:
mixin SharedObject!(string, Anchor);
///Construct an anchor from a string representation.
this(string anchor)
{
if(anchor is null || anchor == "")
{
index_ = uint.max;
return;
}
add(anchor);
}
}
alias ZeroString!"Anchor" Anchor;

View file

@ -25,7 +25,7 @@ import dyaml.node;
import dyaml.representer;
import dyaml.resolver;
import dyaml.serializer;
import dyaml.tagdirectives;
import dyaml.tagdirective;
/**
@ -146,7 +146,7 @@ struct Dumper
///YAML version string.
string YAMLVersion_ = "1.1";
///Tag directives to use.
TagDirectives tags_ = TagDirectives();
TagDirective[] tags_ = null;
///Always write document start?
bool explicitStart_ = false;
///Always write document end?
@ -167,7 +167,7 @@ struct Dumper
*
* Throws: YAMLException if the file can not be dumped to (e.g. cannot be opened).
*/
this(in string filename)
this(string filename)
{
name_ = filename;
try{this(new File(filename, FileMode.OutNew));}
@ -184,20 +184,16 @@ struct Dumper
resolver_ = defaultResolver_;
representer_ = defaultRepresenter_;
stream_ = stream;
Anchor.addReference();
TagDirectives.addReference();
}
///Destroy the Dumper.
~this()
{
Anchor.removeReference();
TagDirectives.removeReference();
YAMLVersion_ = null;
}
///Set stream _name. Used in debugging messages.
@property void name(in string name)
@property void name(string name)
{
name_ = name;
}
@ -217,13 +213,13 @@ struct Dumper
}
///Write scalars in _canonical form?
@property void canonical(in bool canonical)
@property void canonical(bool canonical)
{
canonical_ = canonical;
}
///Set indentation width. 2 by default. Must not be zero.
@property void indent(in uint indent)
@property void indent(uint indent)
in
{
assert(indent != 0, "Can't use zero YAML indent width");
@ -234,37 +230,37 @@ struct Dumper
}
///Set preferred text _width.
@property void textWidth(in uint width)
@property void textWidth(uint width)
{
textWidth_ = width;
}
///Set line break to use. Unix by default.
@property void lineBreak(in LineBreak lineBreak)
@property void lineBreak(LineBreak lineBreak)
{
lineBreak_ = lineBreak;
}
///Set character _encoding to use. UTF-8 by default.
@property void encoding(in Encoding encoding)
@property void encoding(Encoding encoding)
{
encoding_ = encoding;
}
///Always explicitly write document start?
@property void explicitStart(in bool explicit)
@property void explicitStart(bool explicit)
{
explicitStart_ = explicit;
}
///Always explicitly write document end?
@property void explicitEnd(in bool explicit)
@property void explicitEnd(bool explicit)
{
explicitEnd_ = explicit;
}
///Specify YAML version string. "1.1" by default.
@property void YAMLVersion(in string YAMLVersion)
@property void YAMLVersion(string YAMLVersion)
{
YAMLVersion_ = YAMLVersion;
}
@ -301,16 +297,16 @@ struct Dumper
*/
@property void tagDirectives(string[string] tags)
{
tagDirective[] t;
TagDirective[] t;
foreach(handle, prefix; tags)
{
assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!',
"A tag handle is empty or does not start and end with a "
"'!' character : " ~ handle);
assert(prefix.length >= 1, "A tag prefix is empty");
t ~= tagDirective(handle, prefix);
t ~= TagDirective(handle, prefix);
}
tags_ = TagDirectives(t);
tags_ = t;
}
/**
@ -352,7 +348,7 @@ struct Dumper
*
* Throws: YAMLException if unable to emit.
*/
void emit(in Event[] events)
void emit(Event[] events)
{
try
{

View file

@ -71,11 +71,11 @@ private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_;
struct Emitter
{
private:
alias dyaml.tagdirectives.tagDirective tagDirective;
alias dyaml.tagdirective.TagDirective TagDirective;
///Default tag handle shortcuts and replacements.
static tagDirective[] defaultTagDirectives_ =
[tagDirective("!", "!"), tagDirective("!!", "tag:yaml.org,2002:")];
static TagDirective[] defaultTagDirectives_ =
[TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")];
///Stream to write to.
Stream stream_;
@ -135,7 +135,7 @@ struct Emitter
LineBreak bestLineBreak_;
///Tag directive handle - prefix pairs.
tagDirective[] tagDirectives_;
TagDirective[] tagDirectives_;
///Anchor/alias to process.
string preparedAnchor_ = null;
@ -193,7 +193,7 @@ struct Emitter
}
///Emit an event. Throws EmitterException on error.
void emit(immutable Event event)
void emit(Event event)
{
events_.push(event);
while(!needMoreEvents())
@ -254,7 +254,7 @@ struct Emitter
{
if(events_.length == 0){return true;}
immutable event = events_.peek();
immutable event = cast(immutable Event)events_.peek();
if(event.id == EventID.DocumentStart){return needEvents(1);}
if(event.id == EventID.SequenceStart){return needEvents(2);}
if(event.id == EventID.MappingStart) {return needEvents(3);}
@ -274,7 +274,7 @@ struct Emitter
events_.next();
while(!events_.iterationOver())
{
immutable event = events_.next();
immutable event = cast(immutable Event)events_.next();
static starts = [EventID.DocumentStart, EventID.SequenceStart, EventID.MappingStart];
static ends = [EventID.DocumentEnd, EventID.SequenceEnd, EventID.MappingEnd];
if(starts.canFind(event.id)) {++level;}
@ -347,8 +347,8 @@ struct Emitter
if(event_.id == EventID.DocumentStart)
{
const YAMLVersion = event_.value;
const tagDirectives = event_.tagDirectives;
if(openEnded_ && (YAMLVersion !is null || !tagDirectives.isNull()))
auto tagDirectives = event_.tagDirectives;
if(openEnded_ && (YAMLVersion !is null || tagDirectives !is null))
{
writeIndicator("...", true);
writeIndent();
@ -359,10 +359,10 @@ struct Emitter
writeVersionDirective(prepareVersion(YAMLVersion));
}
if(!tagDirectives.isNull())
if(tagDirectives !is null)
{
tagDirectives_ = tagDirectives.get;
sort!"icmp(a[0], b[0]) < 0"(tagDirectives_);
tagDirectives_ = tagDirectives;
sort!"icmp(a.handle, b.handle) < 0"(tagDirectives_);
foreach(ref pair; tagDirectives_)
{
@ -371,7 +371,7 @@ struct Emitter
}
}
bool eq(ref tagDirective a, ref tagDirective b){return a.handle == b.handle;}
bool eq(ref TagDirective a, ref TagDirective b){return a.handle == b.handle;}
//Add any default tag directives that have not been overriden.
foreach(ref def; defaultTagDirectives_)
{
@ -382,7 +382,7 @@ struct Emitter
}
const implicit = first && !event_.explicitDocument && !canonical_ &&
YAMLVersion is null && tagDirectives.isNull() &&
YAMLVersion is null && tagDirectives is null &&
!checkEmptyDocument();
if(!implicit)
{
@ -684,7 +684,7 @@ struct Emitter
return false;
}
immutable event = events_.peek();
immutable event = cast(immutable Event)events_.peek();
const emptyScalar = event.id == EventID.Scalar && event.anchor.isNull() &&
event.tag.isNull() && event.implicit && event.value == "";
return emptyScalar;
@ -933,14 +933,14 @@ struct Emitter
string suffix = tagString;
//Sort lexicographically by prefix.
sort!"icmp(a[1], b[1]) < 0"(tagDirectives_);
sort!"icmp(a.prefix, b.prefix) < 0"(tagDirectives_);
foreach(ref pair; tagDirectives_)
{
auto prefix = pair[1];
auto prefix = pair.prefix;
if(tagString.startsWith(prefix) &&
(prefix != "!" || prefix.length < tagString.length))
{
handle = pair[0];
handle = pair.handle;
suffix = tagString[prefix.length .. $];
}
}

View file

@ -19,7 +19,7 @@ import dyaml.encoding;
import dyaml.exception;
import dyaml.reader;
import dyaml.tag;
import dyaml.tagdirectives;
import dyaml.tagdirective;
import dyaml.style;
@ -55,30 +55,40 @@ struct Event
Mark startMark;
///End position of the event in file/stream.
Mark endMark;
union
{
struct
{
///Anchor of the event, if any.
Anchor anchor;
///Tag of the event, if any.
Tag tag;
}
///Tag directives, if this is a DocumentStart.
//TagDirectives tagDirectives;
TagDirective[] tagDirectives;
}
///Event type.
EventID id = EventID.Invalid;
///Style of scalar event, if this is a scalar event.
ScalarStyle scalarStyle;
ScalarStyle scalarStyle = ScalarStyle.Invalid;
union
{
///Should the tag be implicitly resolved?
bool implicit;
///TODO figure this out - Unknown, used by PyYAML with Scalar events.
bool implicit_2;
/**
* Is this document event explicit?
*
* Used if this is a DocumentStart or DocumentEnd.
*/
alias implicit explicitDocument;
///Tag directives, if this is a DocumentStart.
TagDirectives tagDirectives;
bool explicitDocument;
}
///TODO figure this out - Unknown, used by PyYAML with Scalar events.
bool implicit_2;
///Encoding of the stream, if this is a StreamStart.
Encoding encoding;
///Collection style, if this is a SequenceStart or MappingStart.
CollectionStyle collectionStyle;
CollectionStyle collectionStyle = CollectionStyle.Invalid;
///Is this a null (uninitialized) event?
@property bool isNull() const {return id == EventID.Invalid;}
@ -96,7 +106,12 @@ struct Event
*/
Event event(EventID id)(in Mark start, in Mark end, in Anchor anchor = Anchor()) pure
{
return Event(null, start, end, anchor, Tag(), id);
Event result;
result.startMark = start;
result.endMark = end;
result.anchor = anchor;
result.id = id;
return result;
}
/**
@ -114,8 +129,15 @@ Event collectionStartEvent(EventID id)(in Mark start, in Mark end, in Anchor anc
{
static assert(id == EventID.SequenceStart || id == EventID.SequenceEnd ||
id == EventID.MappingStart || id == EventID.MappingEnd);
return Event(null, start, end, anchor, tag, id, ScalarStyle.Invalid, implicit,
false, TagDirectives(), Encoding.UTF_8, style);
Event result;
result.startMark = start;
result.endMark = end;
result.anchor = anchor;
result.tag = tag;
result.id = id;
result.implicit = implicit;
result.collectionStyle = style;
return result;
}
/**
@ -127,8 +149,12 @@ Event collectionStartEvent(EventID id)(in Mark start, in Mark end, in Anchor anc
*/
Event streamStartEvent(in Mark start, in Mark end, in Encoding encoding) pure
{
return Event(null, start, end, Anchor(), Tag(), EventID.StreamStart,
ScalarStyle.Invalid, false, false, TagDirectives(), encoding);
Event result;
result.startMark = start;
result.endMark = end;
result.id = EventID.StreamStart;
result.encoding = encoding;
return result;
}
///Aliases for simple events.
@ -151,10 +177,16 @@ alias collectionStartEvent!(EventID.MappingStart) mappingStartEvent;
* tagDirectives = Tag directives of the document.
*/
Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLVersion,
in TagDirectives tagDirectives) pure
TagDirective[] tagDirectives) pure
{
return Event(YAMLVersion, start, end, Anchor(), Tag(), EventID.DocumentStart,
ScalarStyle.Invalid, explicit, false, tagDirectives);
Event result;
result.value = YAMLVersion;
result.startMark = start;
result.endMark = end;
result.id = EventID.DocumentStart;
result.explicitDocument = explicit;
result.tagDirectives = tagDirectives;
return result;
}
/**
@ -166,8 +198,12 @@ Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLV
*/
Event documentEndEvent(in Mark start, in Mark end, bool explicit) pure
{
return Event(null, start, end, Anchor(), Tag(), EventID.DocumentEnd,
ScalarStyle.Invalid, explicit);
Event result;
result.startMark = start;
result.endMark = end;
result.id = EventID.DocumentEnd;
result.explicitDocument = explicit;
return result;
}
/**
@ -185,6 +221,15 @@ Event scalarEvent(in Mark start, in Mark end, in Anchor anchor, in Tag tag,
in Tuple!(bool, bool) implicit, in string value,
in ScalarStyle style = ScalarStyle.Invalid) pure
{
return Event(value, start, end, anchor, tag, EventID.Scalar, style, implicit[0],
implicit[1]);
Event result;
result.value = value;
result.startMark = start;
result.endMark = end;
result.anchor = anchor;
result.tag = tag;
result.id = EventID.Scalar;
result.scalarStyle = style;
result.implicit = implicit[0];
result.implicit_2 = implicit[1];
return result;
}

View file

@ -13,7 +13,6 @@ module dyaml.loader;
import std.exception;
import std.stream;
import dyaml.anchor;
import dyaml.composer;
import dyaml.constructor;
import dyaml.event;
@ -23,7 +22,6 @@ import dyaml.parser;
import dyaml.reader;
import dyaml.resolver;
import dyaml.scanner;
import dyaml.tagdirectives;
import dyaml.token;
@ -114,6 +112,8 @@ struct Loader
Constructor constructor_;
///Name of the input file or stream, used in error messages.
string name_ = "<unknown>";
///Are we done loading?
bool done_ = false;
public:
@disable this();
@ -127,7 +127,7 @@ struct Loader
*
* Throws: YAMLException if the file could not be opened or read.
*/
this(in string filename)
this(string filename)
{
name_ = filename;
try{this(new File(filename));}
@ -154,8 +154,6 @@ struct Loader
parser_ = new Parser(scanner_);
resolver_ = defaultResolver_;
constructor_ = defaultConstructor_;
Anchor.addReference();
TagDirectives.addReference();
}
catch(YAMLException e)
{
@ -167,8 +165,6 @@ struct Loader
///Destroy the Loader.
~this()
{
Anchor.removeReference();
TagDirectives.removeReference();
clear(reader_);
clear(scanner_);
clear(parser_);
@ -197,15 +193,23 @@ struct Loader
*
* If none or more than one YAML document is found, this throws a YAMLException.
*
* This can only be called once; this is enforced by contract.
*
* Returns: Root node of the document.
*
* Throws: YAMLException if there wasn't exactly one document
* or on a YAML parsing error.
*/
Node load()
in
{
assert(!done_, "Loader: Trying to load YAML twice");
}
body
{
try
{
scope(exit){done_ = true;}
auto composer = new Composer(parser_, resolver_, constructor_);
enforce(composer.checkNode(), new YAMLException("No YAML document to load"));
return composer.getSingleNode();
@ -225,6 +229,8 @@ struct Loader
* vice versa will not return any documents, as they have all been parsed
* already.
*
* This can only be called once; this is enforced by contract.
*
* Returns: Array of root nodes of all documents in the file/stream.
*
* Throws: YAMLException on a parsing error.
@ -241,10 +247,18 @@ struct Loader
*
* Parses documents lazily, when they are needed.
*
* Foreach over a Loader can only be used once; this is enforced by contract.
*
* Throws: YAMLException on a parsing error.
*/
int opApply(int delegate(ref Node) dg)
in
{
assert(!done_, "Loader: Trying to load YAML twice");
}
body
{
scope(exit){done_ = true;}
try
{
auto composer = new Composer(parser_, resolver_, constructor_);
@ -284,11 +298,11 @@ struct Loader
}
//Parse and return all events. Used for debugging.
Event[] parse()
immutable(Event)[] parse()
{
try
{
Event[] result;
immutable(Event)[] result;
while(parser_.checkEvent()){result ~= parser_.getEvent();}
return result;
}

View file

@ -24,7 +24,7 @@ import dyaml.scanner;
import dyaml.style;
import dyaml.token;
import dyaml.tag;
import dyaml.tagdirectives;
import dyaml.tagdirective;
package:
@ -108,10 +108,10 @@ final class Parser
{
private:
///Default tag handle shortcuts and replacements.
static tagDirective[] defaultTagDirectives_;
static TagDirective[] defaultTagDirectives_;
static this()
{
defaultTagDirectives_ = [tagDirective("!", "!"), tagDirective("!!", "tag:yaml.org,2002:")];
defaultTagDirectives_ = [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")];
}
///Scanner providing YAML tokens.
@ -123,7 +123,7 @@ final class Parser
///YAML version string.
string YAMLVersion_ = null;
///Tag handle shortcuts and replacements.
tagDirective[] tagDirectives_;
TagDirective[] tagDirectives_;
///Stack of states.
Array!(Event delegate()) states_;
@ -193,13 +193,13 @@ final class Parser
*
* Must not be called if there are no events left.
*/
Event peekEvent()
immutable(Event) peekEvent()
{
if(currentEvent_.isNull && state_ !is null)
{
currentEvent_ = state_();
}
if(!currentEvent_.isNull){return currentEvent_;}
if(!currentEvent_.isNull){return cast(immutable Event)currentEvent_;}
assert(false, "No event left to peek");
}
@ -208,7 +208,7 @@ final class Parser
*
* Must not be called if there are no events left.
*/
Event getEvent()
immutable(Event) getEvent()
{
//Get the next event and proceed further.
if(currentEvent_.isNull && state_ !is null)
@ -218,7 +218,7 @@ final class Parser
if(!currentEvent_.isNull)
{
immutable Event result = currentEvent_;
immutable Event result = cast(immutable Event)currentEvent_;
currentEvent_.id = EventID.Invalid;
return result;
}
@ -273,7 +273,7 @@ final class Parser
states_ ~= &parseDocumentEnd;
state_ = &parseBlockNode;
return documentStartEvent(token.startMark, token.endMark, false, null, TagDirectives());
return documentStartEvent(token.startMark, token.endMark, false, null, null);
}
return parseDocumentStart();
}
@ -336,7 +336,7 @@ final class Parser
}
///Process directives at the beginning of a document.
TagDirectives processDirectives()
TagDirective[] processDirectives()
{
//Destroy version and tag handles from previous document.
YAMLVersion_ = null;
@ -367,21 +367,21 @@ final class Parser
foreach(ref pair; tagDirectives_)
{
//handle
const h = pair[0];
const h = pair.handle;
enforce(h != handle, new Error("Duplicate tag handle: " ~ handle,
token.startMark));
}
tagDirectives_ ~= tagDirective(handle, parts[2]);
tagDirectives_ ~= TagDirective(handle, parts[2]);
}
}
TagDirectives value = tagDirectives_.length == 0 ? TagDirectives() : TagDirectives(tagDirectives_);
TagDirective[] value = tagDirectives_;
//Add any default tag handles that haven't been overridden.
foreach(ref defaultPair; defaultTagDirectives_)
{
bool found = false;
foreach(ref pair; tagDirectives_) if(defaultPair[0] == pair[0])
foreach(ref pair; tagDirectives_) if(defaultPair.handle == pair.handle)
{
found = true;
break;
@ -540,10 +540,9 @@ final class Parser
string replacement = null;
foreach(ref pair; tagDirectives_)
{
//pair[0] is handle, pair[1] replacement.
if(pair[0] == handle)
if(pair.handle == handle)
{
replacement = pair[1];
replacement = pair.prefix;
break;
}
}

View file

@ -37,7 +37,7 @@ struct Queue(T)
///Linked list node containing one element and pointer to the next node.
struct Node
{
T payload_ = T.init;
T payload_;
Node* next_ = null;
}
@ -90,7 +90,7 @@ struct Queue(T)
}
///Push new item to the queue.
void push(in T item)
void push(T item)
{
Node* newLast = allocate!Node(item, cast(Node*)null);
if(last_ !is null){last_.next_ = newLast;}
@ -100,7 +100,7 @@ struct Queue(T)
}
///Insert a new item putting it to specified index in the linked list.
void insert(in T item, in size_t idx)
void insert(T item, in size_t idx)
in
{
assert(idx <= length_);
@ -155,7 +155,7 @@ struct Queue(T)
}
///Return the next element in the queue.
ref const(T) peek() const
ref inout(T) peek() inout
in
{
assert(!empty, "Trying to peek at an element in an empty queue");

View file

@ -7,7 +7,9 @@
module dyaml.reader;
import core.stdc.stdlib;
import core.stdc.string;
import core.thread;
import std.algorithm;
import std.conv;
@ -34,47 +36,30 @@ class ReaderException : YAMLException
}
}
///Reads data from a stream and converts it to UTF-32 (dchar) data.
///Lazily reads and decodes data from stream, only storing as much as needed at any moment.
final class Reader
{
private:
///Input stream.
//Input stream.
EndianStream stream_;
///Allocated space for buffer_.
dchar[] bufferAllocated_;
///Buffer of currently loaded characters.
dchar[] buffer_;
///Current position within buffer. Only data after this position can be read.
//Allocated space for buffer_.
dchar[] bufferAllocated_ = null;
//Buffer of currently loaded characters.
dchar[] buffer_ = null;
//Current position within buffer. Only data after this position can be read.
uint bufferOffset_ = 0;
///Index of the current character in the stream.
//Index of the current character in the stream.
size_t charIndex_ = 0;
///Encoding of the input stream.
Encoding encoding_;
///Current line in file.
//Current line in file.
uint line_;
///Current column in file.
//Current column in file.
uint column_;
///Number of bytes still available (not read) in the stream.
size_t available_;
///Capacity of raw buffers.
static immutable bufferLength8_ = 8;
///Capacity of raw buffers.
static immutable bufferLength16_ = bufferLength8_ / 2;
union
{
///Buffer to hold UTF-8 data before decoding.
char[bufferLength8_ + 1] rawBuffer8_;
///Buffer to hold UTF-16 data before decoding.
wchar[bufferLength16_ + 1] rawBuffer16_;
}
///Number of elements held in the used raw buffer.
uint rawUsed_ = 0;
//Decoder reading data from file and decoding it to UTF-32.
UTFFastDecoder decoder_;
public:
/**
* Construct a Reader.
/*
* Construct an AbstractReader.
*
* Params: stream = Input stream. Must be readable and seekable.
*
@ -89,51 +74,14 @@ final class Reader
body
{
stream_ = new EndianStream(stream);
available_ = stream_.available;
//handle files short enough not to have a BOM
if(available_ < 2)
{
encoding_ = Encoding.UTF_8;
return;
decoder_ = UTFFastDecoder(stream_);
}
//readBOM will determine and set stream endianness
switch(stream_.readBOM(2))
{
case -1:
//readBOM() eats two more bytes in this case so get them back
const wchar bytes = stream_.getcw();
rawBuffer8_[0] = cast(char)(bytes % 256);
rawBuffer8_[1] = cast(char)(bytes / 256);
rawUsed_ = 2;
goto case 0;
case 0: encoding_ = Encoding.UTF_8; break;
case 1, 2:
//readBOM() eats two more bytes in this case so get them back
encoding_ = Encoding.UTF_16;
rawBuffer16_[0] = stream_.getcw();
rawUsed_ = 1;
enforce(available_ % 2 == 0,
new ReaderException("Odd byte count in an UTF-16 stream"));
break;
case 3, 4:
enforce(available_ % 4 == 0,
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
encoding_ = Encoding.UTF_32;
break;
default: assert(false, "Unknown UTF BOM");
}
available_ = stream_.available;
auto ptr = cast(dchar*)core.stdc.stdlib.malloc(dchar.sizeof * 256);
bufferAllocated_ = ptr[0 .. 256];
}
///Destroy the Reader.
~this()
{
core.stdc.stdlib.free(bufferAllocated_.ptr);
//Delete the buffer, if allocated.
if(bufferAllocated_ is null){return;}
free(bufferAllocated_.ptr);
buffer_ = bufferAllocated_ = null;
}
@ -148,12 +96,13 @@ final class Reader
* Throws: ReaderException if trying to read past the end of the stream
* or if invalid data is read.
*/
dchar peek(in size_t index = 0)
dchar peek(size_t index = 0)
{
if(buffer_.length <= bufferOffset_ + index + 1)
if(buffer_.length < bufferOffset_ + index + 1)
{
updateBuffer(index + 1);
}
if(buffer_.length <= bufferOffset_ + index)
{
throw new ReaderException("Trying to read past the end of the stream");
@ -172,7 +121,7 @@ final class Reader
*
* Returns: Characters starting at current position or an empty slice if out of bounds.
*/
const(dstring) prefix(in size_t length)
const(dstring) prefix(size_t length)
{
return slice(0, length);
}
@ -194,12 +143,12 @@ final class Reader
{
updateBuffer(end);
}
end += bufferOffset_;
start += bufferOffset_;
end = min(buffer_.length, end);
if(end <= start){return "";}
return cast(dstring)buffer_[start .. end];
return end > start ? cast(dstring)buffer_[start .. end] : "";
}
/**
@ -227,7 +176,7 @@ final class Reader
* Throws: ReaderException if trying to read past the end of the stream
* or if invalid data is read.
*/
dstring get(in size_t length)
dstring get(size_t length)
{
auto result = prefix(length).dup;
forward(length);
@ -244,13 +193,13 @@ final class Reader
*/
void forward(size_t length = 1)
{
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
if(buffer_.length <= bufferOffset_ + length + 1)
{
updateBuffer(length + 1);
}
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
while(length > 0)
{
const c = buffer_[bufferOffset_];
@ -268,19 +217,19 @@ final class Reader
}
///Get a string describing current stream position, used for error messages.
@property Mark mark() const {return Mark(line_, column_);}
@property final Mark mark() const {return Mark(line_, column_);}
///Get current line number.
@property uint line() const {return line_;}
@property final uint line() const {return line_;}
///Get current line number.
@property uint column() const {return column_;}
///Get current column number.
@property final uint column() const {return column_;}
///Get index of the current character in the stream.
@property size_t charIndex() const {return charIndex_;}
@property final size_t charIndex() const {return charIndex_;}
///Get encoding of the input stream.
@property Encoding encoding() const {return encoding_;}
@property final Encoding encoding() const {return decoder_.encoding;}
private:
/**
@ -296,7 +245,7 @@ final class Reader
*/
void updateBuffer(in size_t length)
{
//get rid of unneeded data in the buffer
//Get rid of unneeded data in the buffer.
if(bufferOffset_ > 0)
{
size_t bufferLength = buffer_.length - bufferOffset_;
@ -306,12 +255,12 @@ final class Reader
bufferOffset_ = 0;
}
////Load chars in batches of at most 1024 bytes (256 chars)
//Load chars in batches of at most 1024 bytes (256 chars)
while(buffer_.length <= bufferOffset_ + length)
{
loadChars(256);
loadChars(512);
if(done)
if(decoder_.done)
{
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
{
@ -325,9 +274,11 @@ final class Reader
}
/**
* Load at most specified number of characters.
* Load more characters to the buffer.
*
* Params: chars = Maximum number of characters to load.
* Params: chars = Recommended number of characters to load.
* More characters might be loaded.
* Less will be loaded if not enough available.
*
* Throws: ReaderException on Unicode decoding error,
* if nonprintable characters are detected, or
@ -335,96 +286,35 @@ final class Reader
*/
void loadChars(size_t chars)
{
///Get next character from the stream.
dchar getDChar()
{
final switch(encoding_)
{
case Encoding.UTF_8:
//Temp buffer for moving data in rawBuffer8_.
char[bufferLength8_] temp;
//Shortcut for ASCII.
if(rawUsed_ > 0 && rawBuffer8_[0] < 128)
{
//Get the first byte (one char in ASCII).
const dchar result = rawBuffer8_[0];
--rawUsed_;
//Move the data.
*(cast(ulong*)temp.ptr) = *(cast(ulong*)(rawBuffer8_.ptr + 1));
*(cast(ulong*)rawBuffer8_.ptr) = *(cast(ulong*)temp.ptr);
return result;
}
//Bytes to read.
const readBytes = min(available_, bufferLength8_ - rawUsed_);
available_ -= readBytes;
//Length of data in rawBuffer8_ after reading.
const len = rawUsed_ + readBytes;
//Read the data.
stream_.readExact(rawBuffer8_.ptr + rawUsed_, readBytes);
//After decoding, this will point to the first byte not decoded.
size_t idx = 0;
const dchar result = decode(rawBuffer8_, idx);
rawUsed_ = cast(uint)(len - idx);
//Move the data.
temp[0 .. rawUsed_] = rawBuffer8_[idx .. len];
rawBuffer8_[0 .. rawUsed_] = temp[0 .. rawUsed_];
return result;
case Encoding.UTF_16:
//Temp buffer for moving data in rawBuffer8_.
wchar[bufferLength16_] temp;
//Words to read.
size_t readWords = min(available_ / 2, bufferLength16_ - rawUsed_);
available_ -= readWords * 2;
//Length of data in rawBuffer16_ after reading.
size_t len = rawUsed_;
//Read the data.
while(readWords > 0)
{
//Due to a bug in std.stream, we have to use getcw here.
rawBuffer16_[len] = stream_.getcw();
--readWords;
++len;
}
//After decoding, this will point to the first word not decoded.
size_t idx = 0;
const dchar result = decode(rawBuffer16_, idx);
rawUsed_ = cast(uint)(len - idx);
//Move the data.
temp[0 .. rawUsed_] = rawBuffer16_[idx .. len];
rawBuffer16_[0 .. rawUsed_] = temp[0 .. rawUsed_];
return result;
case Encoding.UTF_32:
dchar result;
available_ -= 4;
stream_.read(result);
return result;
}
}
const oldLength = buffer_.length;
const oldPosition = stream_.position;
//Preallocating memory to limit GC reallocations.
bufferReserve(buffer_.length + chars);
buffer_ = bufferAllocated_[0 .. buffer_.length + chars];
scope(exit)
scope(success)
{
buffer_ = buffer_[0 .. $ - chars];
enforce(printable(buffer_[oldLength .. $]),
new ReaderException("Special unicode characters are not allowed"));
}
try for(uint c = 0; chars; --chars, ++c)
try for(size_t c = 0; chars && !decoder_.done;)
{
if(done){break;}
buffer_[oldLength + c] = getDChar();
const slice = decoder_.getDChars(chars);
buffer_[oldLength + c .. oldLength + c + slice.length] = slice;
c += slice.length;
chars -= slice.length;
}
catch(Exception e)
{
handleLoadCharsException(e, oldPosition);
}
}
//Handle an exception thrown in loadChars method of any Reader.
void handleLoadCharsException(Exception e, size_t oldPosition)
{
try{throw e;}
catch(UtfException e)
{
const position = stream_.position;
@ -437,15 +327,307 @@ final class Reader
}
}
/**
//Code shared by loadEntireFile methods.
void loadEntireFile_()
{
const maxChars = decoder_.maxChars;
bufferReserve(maxChars + 1);
loadChars(maxChars);
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
{
buffer_ = bufferAllocated_[0 .. buffer_.length + 1];
buffer_[$ - 1] = '\0';
}
}
//Ensure there is space for at least capacity characters in bufferAllocated_.
void bufferReserve(in size_t capacity)
{
if(bufferAllocated_ !is null && bufferAllocated_.length >= capacity){return;}
//Handle first allocation as well as reallocation.
auto ptr = bufferAllocated_ !is null
? realloc(bufferAllocated_.ptr, capacity * dchar.sizeof)
: malloc(capacity * dchar.sizeof);
bufferAllocated_ = (cast(dchar*)ptr)[0 .. capacity];
buffer_ = bufferAllocated_[0 .. buffer_.length];
}
}
private:
alias UTFBlockDecoder!512 UTFFastDecoder;
///Decodes streams to UTF-32 in blocks.
struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
{
private:
//UTF-8 codepoint strides (0xFF are codepoints that can't start a sequence).
static immutable ubyte[256] utf8Stride =
[
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
];
//Encoding of the input stream.
Encoding encoding_;
//Maximum number of characters that might be in the stream.
size_t maxChars_;
//Bytes available in the stream.
size_t available_;
//Input stream.
EndianStream stream_;
//Buffer used to store raw UTF-8 or UTF-16 code points.
union
{
char[bufferSize_] rawBuffer8_;
wchar[bufferSize_ / 2] rawBuffer16_;
}
//Used space (in items) in rawBuffer8_/rawBuffer16_.
size_t rawUsed_;
//Space used by buffer_.
dchar[bufferSize_] bufferSpace_;
//Buffer of decoded, UTF-32 characters. This is a slice into bufferSpace_.
dchar[] buffer_;
public:
///Construct a UTFFastDecoder decoding a stream.
this(EndianStream stream)
{
stream_ = stream;
available_ = stream_.available;
//Handle files short enough not to have a BOM.
if(available_ < 2)
{
encoding_ = Encoding.UTF_8;
maxChars_ = 0;
if(available_ == 1)
{
bufferSpace_[0] = stream_.getc();
buffer_ = bufferSpace_[0 .. 1];
maxChars_ = 1;
}
return;
}
char[] rawBuffer8;
wchar[] rawBuffer16;
//readBOM will determine and set stream endianness.
switch(stream_.readBOM(2))
{
case -1:
//readBOM() eats two more bytes in this case so get them back.
const wchar bytes = stream_.getcw();
rawBuffer8_[0 .. 2] = [cast(ubyte)(bytes % 256), cast(ubyte)(bytes / 256)];
rawUsed_ = 2;
goto case 0;
case 0:
maxChars_ = available_;
encoding_ = Encoding.UTF_8;
break;
case 1, 2:
maxChars_ = available_ / 2;
//readBOM() eats two more bytes in this case so get them back.
encoding_ = Encoding.UTF_16;
rawBuffer16_[0] = stream_.getcw();
rawUsed_ = 1;
enforce(available_ % 2 == 0,
new ReaderException("Odd byte count in an UTF-16 stream"));
break;
case 3, 4:
maxChars_ = available_ / 4;
encoding_ = Encoding.UTF_32;
enforce(available_ % 4 == 0,
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
break;
default: assert(false, "Unknown UTF BOM");
}
available_ = stream_.available;
}
///Get maximum number of characters that might be in the stream.
@property size_t maxChars() const {return maxChars_;}
///Get encoding we're decoding from.
@property Encoding encoding() const {return encoding_;}
///Are we done decoding?
@property bool done() const
{
return rawUsed_ == 0 && buffer_.length == 0 && available_ == 0;
}
///Get next character.
dchar getDChar()
{
if(buffer_.length)
{
const result = buffer_[0];
buffer_ = buffer_[1 .. $];
return result;
}
assert(available_ > 0 || rawUsed_ > 0);
updateBuffer();
return getDChar();
}
///Get as many characters as possible, but at most maxChars. Slice returned will be invalidated in further calls.
const(dchar[]) getDChars(size_t maxChars = size_t.max)
{
if(buffer_.length)
{
const slice = min(buffer_.length, maxChars);
const result = buffer_[0 .. slice];
buffer_ = buffer_[slice .. $];
return result;
}
assert(available_ > 0 || rawUsed_ > 0);
updateBuffer();
return getDChars(maxChars);
}
private:
//Read and decode characters from file and store them in the buffer.
void updateBuffer()
{
assert(buffer_.length == 0);
final switch(encoding_)
{
case Encoding.UTF_8:
const bytes = min(bufferSize_ - rawUsed_, available_);
//Current length of valid data in rawBuffer8_.
const rawLength = rawUsed_ + bytes;
stream_.readExact(rawBuffer8_.ptr + rawUsed_, bytes);
available_ -= bytes;
decodeRawBuffer(rawBuffer8_, rawLength);
break;
case Encoding.UTF_16:
const words = min((bufferSize_ / 2) - rawUsed_, available_ / 2);
//Current length of valid data in rawBuffer16_.
const rawLength = rawUsed_ + words;
foreach(c; rawUsed_ .. rawLength)
{
stream_.read(rawBuffer16_[c]);
available_ -= 2;
}
decodeRawBuffer(rawBuffer16_, rawLength);
break;
case Encoding.UTF_32:
const chars = min(bufferSize_ / 4, available_ / 4);
foreach(c; 0 .. chars)
{
stream_.read(bufferSpace_[c]);
available_ -= 4;
}
buffer_ = bufferSpace_[0 .. chars];
break;
}
}
//Decode contents of a UTF-8 or UTF-16 raw buffer.
void decodeRawBuffer(C)(C[] buffer, const size_t length)
{
//End of part of rawBuffer8_ that contains
//complete characters and can be decoded.
const end = endOfLastUTFSequence(buffer, length);
//If end is 0, there are no full UTF-8 chars.
//This can happen at the end of file if there is an incomplete UTF-8 sequence.
enforce(end > 0,
new ReaderException("Invalid UTF-8 character at the end of stream"));
decodeUTF(buffer[0 .. end]);
//After decoding, any code points not decoded go to the start of raw buffer.
rawUsed_ = length - end;
foreach(i; 0 .. rawUsed_){buffer[i] = buffer[i + end];}
}
//Determine the end of last UTF-8 or UTF-16 sequence in a raw buffer.
size_t endOfLastUTFSequence(C)(const C[] buffer, const size_t max)
{
static if(is(C == char))
{
for(long end = max - 1; end >= 0; --end)
{
const s = utf8Stride[buffer[end]];
if(s != 0xFF)
{
//If stride goes beyond end of the buffer (max), return end.
//Otherwise the last sequence ends at max, so we can return that.
//(Unless there is an invalid code point, which is
//caught at decoding)
return (s > max - end) ? cast(size_t)end : max;
}
}
return 0;
}
else
{
size_t end = 0;
while(end < max)
{
const s = stride(buffer, end);
if(s + end > max){break;}
end += s;
}
return end;
}
}
//Decode a UTF-8 or UTF-16 buffer (with no incomplete sequences at the end).
void decodeUTF(C)(const C[] source)
{
size_t bufpos = 0;
const srclength = source.length;
for(size_t srcpos = 0; srcpos < srclength;)
{
const c = source[srcpos];
if(c < 0x80)
{
bufferSpace_[bufpos++] = c;
++srcpos;
}
else
{
bufferSpace_[bufpos++] = decode(source, srcpos);
}
}
buffer_ = bufferSpace_[0 .. bufpos];
}
}
/**
* Determine if all characters in an array are printable.
*
* Params: chars = Characters to check.
*
* Returns: True if all the characters are printable, false otherwise.
*/
static bool printable(const ref dchar[] chars) pure
{
bool printable(const ref dchar[] chars) pure
{
foreach(c; chars)
{
if(!((c == 0x09 || c == 0x0A || c == 0x0D || c == 0x85) ||
@ -457,47 +639,30 @@ final class Reader
}
}
return true;
}
}
///Are we done reading?
@property bool done() const
{
return (available_ == 0 &&
((encoding_ == Encoding.UTF_8 && rawUsed_ == 0) ||
(encoding_ == Encoding.UTF_16 && rawUsed_ == 0) ||
encoding_ == Encoding.UTF_32));
}
//Unittests.
///Ensure there is space for at least capacity characters in bufferAllocated_.
void bufferReserve(in size_t capacity)
{
if(bufferAllocated_.length >= capacity){return;}
auto newPtr = core.stdc.stdlib.realloc(bufferAllocated_.ptr,
capacity * dchar.sizeof);
bufferAllocated_ = (cast(dchar*)newPtr)[0 .. capacity];
buffer_ = bufferAllocated_[0 .. buffer_.length];
}
unittest
{
writeln("D:YAML reader endian unittest");
void testEndian(R)()
{
writeln(typeid(R).toString() ~ ": endian unittest");
void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected)
{
auto reader = new Reader(new MemoryStream(data));
assert(reader.encoding_ == encoding_expected);
Reader reader = new R(new MemoryStream(data));
assert(reader.encoding == encoding_expected);
assert(reader.stream_.endian == endian_expected);
}
ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A];
endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian);
endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian);
}
unittest
{
writeln("D:YAML reader peek/prefix/forward unittest");
}
void testPeekPrefixForward(R)()
{
writeln(typeid(R).toString() ~ ": peek/prefix/forward unittest");
ubyte[] data = ByteOrderMarks[BOM.UTF8] ~ cast(ubyte[])"data";
auto reader = new Reader(new MemoryStream(data));
Reader reader = new R(new MemoryStream(data));
assert(reader.peek() == 'd');
assert(reader.peek(1) == 'a');
assert(reader.peek(2) == 't');
@ -508,16 +673,17 @@ final class Reader
reader.forward(2);
assert(reader.peek(1) == 'a');
assert(collectException(reader.peek(3)));
}
unittest
{
writeln("D:YAML reader UTF formats unittest");
}
void testUTF(R)()
{
writeln(typeid(R).toString() ~ ": UTF formats unittest");
dchar[] data = cast(dchar[])"data";
void utf_test(T)(T[] data, BOM bom)
{
ubyte[] bytes = ByteOrderMarks[bom] ~
(cast(ubyte*)data.ptr)[0 .. data.length * T.sizeof];
auto reader = new Reader(new MemoryStream(bytes));
Reader reader = new R(new MemoryStream(bytes));
assert(reader.peek() == 'd');
assert(reader.peek(1) == 'a');
assert(reader.peek(2) == 't');
@ -526,5 +692,11 @@ final class Reader
utf_test!char(to!(char[])(data), BOM.UTF8);
utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.UTF16BE : BOM.UTF16LE);
utf_test(data, endian == Endian.bigEndian ? BOM.UTF32BE : BOM.UTF32LE);
}
}
unittest
{
testEndian!Reader();
testPeekPrefixForward!Reader();
testUTF!Reader();
}

View file

@ -65,7 +65,7 @@ final class Representer
* disabled to use custom representer
* functions for default types.
*/
this(in bool useDefaultRepresenters = true)
this(bool useDefaultRepresenters = true)
{
if(!useDefaultRepresenters){return;}
addRepresenter!YAMLNull(&representNull);
@ -87,13 +87,13 @@ final class Representer
}
///Set default _style for scalars. Invalid means the _style is chosen automatically.
@property void defaultScalarStyle(in ScalarStyle style)
@property void defaultScalarStyle(ScalarStyle style)
{
defaultScalarStyle_ = style;
}
///Set default _style for collections. Invalid means the _style is chosen automatically.
@property void defaultCollectionStyle(in CollectionStyle style)
@property void defaultCollectionStyle(CollectionStyle style)
{
defaultCollectionStyle_ = style;
}
@ -237,7 +237,7 @@ final class Representer
* }
* --------------------
*/
Node representScalar(in string tag, string scalar,
Node representScalar(string tag, string scalar,
ScalarStyle style = ScalarStyle.Invalid)
{
if(style == ScalarStyle.Invalid){style = defaultScalarStyle_;}
@ -276,7 +276,7 @@ final class Representer
* }
* --------------------
*/
Node representSequence(in string tag, Node[] sequence,
Node representSequence(string tag, Node[] sequence,
CollectionStyle style = CollectionStyle.Invalid)
{
Node[] value;
@ -335,7 +335,7 @@ final class Representer
* }
* --------------------
*/
Node representMapping(in string tag, Node.Pair[] pairs,
Node representMapping(string tag, Node.Pair[] pairs,
CollectionStyle style = CollectionStyle.Invalid)
{
Node.Pair[] value;

View file

@ -23,7 +23,7 @@ import dyaml.exception;
import dyaml.node;
import dyaml.resolver;
import dyaml.tag;
import dyaml.tagdirectives;
import dyaml.tagdirective;
import dyaml.token;
@ -46,7 +46,7 @@ struct Serializer
string YAMLVersion_;
///Tag directives to emit.
TagDirectives tagDirectives_;
TagDirective[] tagDirectives_;
//TODO Use something with more deterministic memory usage.
///Nodes with assigned anchors.
@ -70,7 +70,7 @@ struct Serializer
*/
this(ref Emitter emitter, Resolver resolver, Encoding encoding,
in bool explicitStart, in bool explicitEnd, string YAMLVersion,
TagDirectives tagDirectives)
TagDirective[] tagDirectives)
{
emitter_ = &emitter;
resolver_ = resolver;

View file

@ -1,131 +0,0 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Shared object.
module dyaml.sharedobject;
/**
* Mixin for shared objects (need a better name).
*
* This works as an index to a static array of type T. Any new object created is
* checked for presence in the array to prevent duplication.
*
* This is useful for e.g. token/event data that rarely needs to be
* stored (e.g. tag directives) to prevent inflation of these structs,
* and when there are many instances of a data type that are mostly
* duplicates (e.g. tags).
*
* This is not the most elegant way to store the extra data and might change in future.
*/
template SharedObject(T, MixedIn)
{
private:
///This class stores the data that is shared between the objects.
class SharedData
{
private:
/**
* Reference count.
*
* When this reaches zero, objects_ are cleared. This is not
* the number of shared objects, but rather of objects using this kind
* of shared object.
*/
int referenceCount_ = 0;
///All known objects of type T are in this array.
T[] objects_;
public:
///Increment the reference count.
void addReference()
{
assert(referenceCount_ >= 0);
++referenceCount_;
}
///Decrement the reference count and clear the constructed objects if zero.
void removeReference()
{
--referenceCount_;
assert(referenceCount_ >= 0);
if(referenceCount_ == 0)
{
clear(objects_);
objects_ = [];
}
}
///Add an object and return its index.
uint add(ref T object)
{
foreach(index, ref known; objects_) if(object == known)
{
return cast(uint)index;
}
objects_ ~= object;
return cast(uint)objects_.length - 1;
}
///Get the object at specified object.
@property T get(in uint index)
{
return objects_[index];
}
}
///Index of the object in data_.
uint index_ = uint.max;
///Stores the actual objects.
static __gshared SharedData data_;
static this()
{
data_ = new SharedData;
}
public:
///Increment the reference count.
static void addReference()
{
synchronized(data_){data_.addReference();}
}
///Decrement the reference count and clear the constructed objects if zero.
static void removeReference()
{
synchronized(data_){data_.removeReference();}
}
///Get the object.
@property T get() const
in{assert(!isNull());}
body
{
T result;
synchronized(data_){result = data_.get(index_);}
return result;
}
///Test for equality with another object.
bool opEquals(const ref MixedIn object) const
{
return object.index_ == index_;
}
///Is this object null (invalid)?
@property bool isNull() const {return index_ == uint.max;}
private:
///Add a new object, checking if identical object already exists.
void add(ref T object)
{
synchronized(data_){index_ = data_.add(object);}
}
}

View file

@ -7,66 +7,7 @@
///YAML tag.
module dyaml.tag;
import dyaml.zerostring;
import core.stdc.string;
///YAML tag (data type) struct. Encapsulates a tag to save memory and speed-up comparison.
struct Tag
{
private:
///Zero terminated tag string.
immutable(char)* tag_ = null;
public:
@disable int opCmp(ref Tag);
///Construct a tag from a string representation.
this(in string tag)
{
if(tag is null || tag == "")
{
tag_ = null;
return;
}
tag_ = (tag ~ '\0').ptr;
}
///Get the tag string.
@property string get() const
in{assert(!isNull());}
body
{
return cast(string)tag_[0 .. strlen(tag_)];
}
///Test for equality with another tag.
bool opEquals(const ref Tag tag) const
{
return isNull ? tag.isNull :
tag.isNull ? false : (0 == strcmp(tag_, tag.tag_));
}
///Compute a hash.
hash_t toHash() const
in{assert(!isNull);}
body
{
static type = typeid(string);
auto str = get();
return type.getHash(&str);
}
///Compare with another tag.
int opCmp(const ref Tag tag) const
in{assert(!isNull && !tag.isNull);}
body
{
return strcmp(tag_, tag.tag_);
}
///Is this tag null (invalid)?
@property bool isNull() const {return tag_ is null;}
}
///YAML tag (data type) struct. Encapsulates a tag to save memory and speed up comparison.
alias ZeroString!"Tag" Tag;

15
dyaml/tagdirective.d Normal file
View file

@ -0,0 +1,15 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Tag directives.
module dyaml.tagdirective;
///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it.
struct TagDirective
{
string handle;
string prefix;
}

View file

@ -1,28 +0,0 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Tag directives.
module dyaml.tagdirectives;
import std.typecons;
import dyaml.sharedobject;
///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it.
alias Tuple!(string, "handle", string, "prefix") tagDirective;
///Tag directives stored in Event.
struct TagDirectives
{
public:
mixin SharedObject!(tagDirective[], TagDirectives);
///Construct a tags object from an array of tag directives.
this(tagDirective[] tagDirectives)
{
add(tagDirectives);
}
}

73
dyaml/zerostring.d Normal file
View file

@ -0,0 +1,73 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Zero terminated string.
module dyaml.zerostring;
import core.stdc.string;
/**
* Zero terminated string used to decrease data structure size.
*
* TypeName is used to differentiate types (better than simple alias).
*/
struct ZeroString(string TypeName)
{
private:
///Zero terminated string.
immutable(char)* str_ = null;
public:
@disable int opCmp(ref ZeroString);
///Construct a string.
this(in string str)
{
if(str is null || str == "")
{
str_ = null;
return;
}
str_ = (str ~ '\0').ptr;
}
///Get the string.
@property string get() const
in{assert(!isNull());}
body
{
return cast(string)str_[0 .. strlen(str_)];
}
///Test for equality with another string.
bool opEquals(const ref ZeroString str) const
{
return isNull ? str.isNull :
str.isNull ? false : (0 == strcmp(str_, str.str_));
}
///Compute a hash.
hash_t toHash() const
in{assert(!isNull);}
body
{
static type = typeid(string);
auto str = get();
return type.getHash(&str);
}
///Compare with another string.
int opCmp(const ref ZeroString str) const
in{assert(!isNull && !str.isNull);}
body
{
return strcmp(str_, str.str_);
}
///Is this string null (invalid)?
@property bool isNull() const {return str_ is null;}
}

View file

@ -42,8 +42,8 @@ void testParser(bool verbose, string dataFilename, string canonicalFilename)
*/
void testLoader(bool verbose, string dataFilename, string canonicalFilename)
{
auto data = Loader(dataFilename).loadAll;
auto canonical = Loader(canonicalFilename).loadAll;
auto data = Loader(dataFilename).loadAll();
auto canonical = Loader(canonicalFilename).loadAll();
assert(data.length == canonical.length, "Unequal node count");
foreach(n; 0 .. data.length)
@ -58,7 +58,7 @@ void testLoader(bool verbose, string dataFilename, string canonicalFilename)
writeln("Canonical value:");
writeln(canonical[n].debugString);
}
assert(false);
assert(false, "testLoader(" ~ dataFilename ~ ", " ~ canonicalFilename ~ ") failed");
}
}
}

View file

@ -83,7 +83,7 @@ void testEmitterOnData(bool verbose, string dataFilename, string canonicalFilena
{
//Must exist due to Anchor, Tags reference counts.
auto loader = Loader(dataFilename);
auto events = loader.parse();
auto events = cast(Event[])loader.parse();
auto emitStream = new MemoryStream;
Dumper(emitStream).emit(events);
@ -98,7 +98,7 @@ void testEmitterOnData(bool verbose, string dataFilename, string canonicalFilena
loader2.name = "TEST";
loader2.constructor = new Constructor;
loader2.resolver = new Resolver;
auto newEvents = loader2.parse();
auto newEvents = cast(Event[])loader2.parse();
assert(compareEvents(events, newEvents));
}
@ -114,7 +114,7 @@ void testEmitterOnCanonical(bool verbose, string canonicalFilename)
{
//Must exist due to Anchor, Tags reference counts.
auto loader = Loader(canonicalFilename);
auto events = loader.parse();
auto events = cast(Event[])loader.parse();
foreach(canonical; [false, true])
{
auto emitStream = new MemoryStream;
@ -130,7 +130,7 @@ void testEmitterOnCanonical(bool verbose, string canonicalFilename)
loader2.name = "TEST";
loader2.constructor = new Constructor;
loader2.resolver = new Resolver;
auto newEvents = loader2.parse();
auto newEvents = cast(Event[])loader2.parse();
assert(compareEvents(events, newEvents));
}
}
@ -151,7 +151,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
{
//must exist due to Anchor, Tags reference counts
auto loader = Loader(canonicalFilename);
auto events = loader.parse();
auto events = cast(Event[])loader.parse();
foreach(flowStyle; [CollectionStyle.Block, CollectionStyle.Flow])
{
foreach(style; [ScalarStyle.Literal, ScalarStyle.Folded,
@ -191,7 +191,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
loader2.name = "TEST";
loader2.constructor = new Constructor;
loader2.resolver = new Resolver;
auto newEvents = loader2.parse();
auto newEvents = cast(Event[])loader2.parse();
assert(compareEvents(events, newEvents));
}
}

View file

@ -72,7 +72,8 @@ void testLoaderErrorFilename(bool verbose, string errorFilename)
if(verbose){writeln(typeid(e).toString(), "\n", e);}
return;
}
assert(false, "Expected an exception");
assert(false, "testLoaderErrorSingle(" ~ to!string(verbose) ~
", " ~ errorFilename ~ ") Expected an exception");
}
/**