Reader was reimplemented.

After experiments with loading the whole file at once, and
with decoding and parsing in separate thread, lazy reader
turned to be the fastest/least memory intensive solution.
Characters are now decoded in small batches.
This improved parsing speed by ~20%.

No global state anymore. Anchors are now zero terminated strings
and TagDirectives are a simple array. Event structure was changed
to prevent size increase.
Minor fixes and improvements.
This commit is contained in:
Ferdinand Majerech 2011-11-16 03:10:29 +01:00
parent f95f0d14c8
commit 2c9d464389
28 changed files with 707 additions and 618 deletions

Binary file not shown.

View file

@ -112,57 +112,57 @@
<dd><p>Set stream name. Used in debugging messages.</p> <dd><p>Set stream name. Used in debugging messages.</p>
</dd> </dd>
<dt class="d_decl">void <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>(Resolver <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>); <dt class="d_decl">@property void <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>(Resolver <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>);
</dt> </dt>
<dd><p>Specify custom Resolver to use.</p> <dd><p>Specify custom Resolver to use.</p>
</dd> </dd>
<dt class="d_decl">void <a name="representer"></a><span class="ddoc_psymbol">representer</span>(Representer <a name="representer"></a><span class="ddoc_psymbol">representer</span>); <dt class="d_decl">@property void <a name="representer"></a><span class="ddoc_psymbol">representer</span>(Representer <a name="representer"></a><span class="ddoc_psymbol">representer</span>);
</dt> </dt>
<dd><p>Specify custom Representer to use.</p> <dd><p>Specify custom Representer to use.</p>
</dd> </dd>
<dt class="d_decl">void <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>(in bool <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>); <dt class="d_decl">@property void <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>(bool <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>);
</dt> </dt>
<dd><p>Write scalars in canonical form?</p> <dd><p>Write scalars in canonical form?</p>
</dd> </dd>
<dt class="d_decl">void <a name="indent"></a><span class="ddoc_psymbol">indent</span>(in uint <a name="indent"></a><span class="ddoc_psymbol">indent</span>); <dt class="d_decl">@property void <a name="indent"></a><span class="ddoc_psymbol">indent</span>(uint <a name="indent"></a><span class="ddoc_psymbol">indent</span>);
</dt> </dt>
<dd><p>Set indentation width. 2 by default. Must not be zero.</p> <dd><p>Set indentation width. 2 by default. Must not be zero.</p>
</dd> </dd>
<dt class="d_decl">void <a name="textWidth"></a><span class="ddoc_psymbol">textWidth</span>(in uint <b>width</b>); <dt class="d_decl">@property void <a name="textWidth"></a><span class="ddoc_psymbol">textWidth</span>(uint <b>width</b>);
</dt> </dt>
<dd><p>Set preferred text width.</p> <dd><p>Set preferred text width.</p>
</dd> </dd>
<dt class="d_decl">void <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>(in LineBreak <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>); <dt class="d_decl">@property void <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>(LineBreak <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>);
</dt> </dt>
<dd><p>Set line break to use. Unix by default.</p> <dd><p>Set line break to use. Unix by default.</p>
</dd> </dd>
<dt class="d_decl">void <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>(in Encoding <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>); <dt class="d_decl">@property void <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>(Encoding <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>);
</dt> </dt>
<dd><p>Set character encoding to use. UTF-8 by default.</p> <dd><p>Set character encoding to use. UTF-8 by default.</p>
</dd> </dd>
<dt class="d_decl">void <a name="explicitStart"></a><span class="ddoc_psymbol">explicitStart</span>(in bool <b>explicit</b>); <dt class="d_decl">@property void <a name="explicitStart"></a><span class="ddoc_psymbol">explicitStart</span>(bool <b>explicit</b>);
</dt> </dt>
<dd><p>Always explicitly write document start?</p> <dd><p>Always explicitly write document start?</p>
</dd> </dd>
<dt class="d_decl">void <a name="explicitEnd"></a><span class="ddoc_psymbol">explicitEnd</span>(in bool <b>explicit</b>); <dt class="d_decl">@property void <a name="explicitEnd"></a><span class="ddoc_psymbol">explicitEnd</span>(bool <b>explicit</b>);
</dt> </dt>
<dd><p>Always explicitly write document end?</p> <dd><p>Always explicitly write document end?</p>
</dd> </dd>
<dt class="d_decl">void <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>(in string <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>); <dt class="d_decl">@property void <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>(string <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>);
</dt> </dt>
<dd><p>Specify YAML version string. "1.1" by default.</p> <dd><p>Specify YAML version string. "1.1" by default.</p>
</dd> </dd>
<dt class="d_decl">void <a name="tagDirectives"></a><span class="ddoc_psymbol">tagDirectives</span>(string[string] <b>tags</b>); <dt class="d_decl">@property void <a name="tagDirectives"></a><span class="ddoc_psymbol">tagDirectives</span>(string[string] <b>tags</b>);
</dt> </dt>
<dd><p>Specify tag directives. <dd><p>Specify tag directives.
</p> </p>

View file

@ -98,11 +98,11 @@
</pre> </pre>
</div> </div>
<dl><dt class="d_decl">this(in const(immutable(char)[]) <b>filename</b>); <dl><dt class="d_decl">this(string <b>filename</b>);
</dt> </dt>
<dd><p>Construct a Loader to load YAML from a file. <dd><p>Construct a Loader to load YAML from a file.
</p> </p>
<b>Parameters:</b><div class="pbr"><table class=parms><tr><td valign=top>const(immutable(char)[]) <b>filename</b></td> <b>Parameters:</b><div class="pbr"><table class=parms><tr><td valign=top>string <b>filename</b></td>
<td valign=top>Name of the file to load from.</td></tr> <td valign=top>Name of the file to load from.</td></tr>
</table></div> </table></div>
<b>Throws:</b><div class="pbr">YAMLException if the file could not be opened or read.</div> <b>Throws:</b><div class="pbr">YAMLException if the file could not be opened or read.</div>
@ -138,6 +138,9 @@
<dd><p>Load single YAML document. <dd><p>Load single YAML document.
</p> </p>
<p>If none or more than one YAML document is found, this throws a YAMLException. <p>If none or more than one YAML document is found, this throws a YAMLException.
<br>
This can only be called once; this is enforced by contract.
</p> </p>
<b>Returns:</b><div class="pbr">Root node of the document. <b>Returns:</b><div class="pbr">Root node of the document.
@ -155,6 +158,9 @@
them all at once. Calling <a name="loadAll"></a><span class="ddoc_psymbol">loadAll</span> after iterating over the node or them all at once. Calling <a name="loadAll"></a><span class="ddoc_psymbol">loadAll</span> after iterating over the node or
vice versa will not return any documents, as they have all been parsed vice versa will not return any documents, as they have all been parsed
already. already.
<br>
This can only be called once; this is enforced by contract.
</p> </p>
<b>Returns:</b><div class="pbr">Array of root nodes of all documents in the file/stream. <b>Returns:</b><div class="pbr">Array of root nodes of all documents in the file/stream.
@ -168,6 +174,9 @@
<dd><p>Foreach over YAML documents. <dd><p>Foreach over YAML documents.
</p> </p>
<p>Parses documents lazily, when they are needed. <p>Parses documents lazily, when they are needed.
<br>
Foreach over a Loader can only be used once; this is enforced by contract.
</p> </p>
<b>Throws:</b><div class="pbr">YAMLException on a parsing error.</div> <b>Throws:</b><div class="pbr">YAMLException on a parsing error.</div>

View file

@ -182,7 +182,7 @@
</div> </div>
</dd> </dd>
<dt class="d_decl">Node <a name="representScalar"></a><span class="ddoc_psymbol">representScalar</span>(in string <b>tag</b>, string <b>scalar</b>, ScalarStyle <b>style</b> = (ScalarStyle).Invalid); <dt class="d_decl">Node <a name="representScalar"></a><span class="ddoc_psymbol">representScalar</span>(string <b>tag</b>, string <b>scalar</b>, ScalarStyle <b>style</b> = (ScalarStyle).Invalid);
</dt> </dt>
<dd><p>Represent a scalar with specified tag. <dd><p>Represent a scalar with specified tag.
</p> </p>
@ -217,7 +217,7 @@
</p> </p>
</dd> </dd>
<dt class="d_decl">Node <a name="representSequence"></a><span class="ddoc_psymbol">representSequence</span>(in string <b>tag</b>, Node[] <b>sequence</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid); <dt class="d_decl">Node <a name="representSequence"></a><span class="ddoc_psymbol">representSequence</span>(string <b>tag</b>, Node[] <b>sequence</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
</dt> </dt>
<dd><p>Represent a sequence with specified tag, representing children first. <dd><p>Represent a sequence with specified tag, representing children first.
</p> </p>
@ -256,7 +256,7 @@
</p> </p>
</dd> </dd>
<dt class="d_decl">Node <a name="representMapping"></a><span class="ddoc_psymbol">representMapping</span>(in string <b>tag</b>, Pair[] <b>pairs</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid); <dt class="d_decl">Node <a name="representMapping"></a><span class="ddoc_psymbol">representMapping</span>(string <b>tag</b>, Pair[] <b>pairs</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
</dt> </dt>
<dd><p>Represent a mapping with specified tag, representing children first. <dd><p>Represent a mapping with specified tag, representing children first.
</p> </p>

View file

@ -138,7 +138,7 @@ struct appears in Phobos.</p>
</div> </div>
<div class="footer"> <div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov. &copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011. Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div> </div>
</body> </body>

View file

@ -104,7 +104,7 @@
</div> </div>
<div class="footer"> <div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov. &copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011. Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div> </div>
</body> </body>

View file

@ -87,7 +87,7 @@
</div> </div>
<div class="footer"> <div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov. &copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011. Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div> </div>
</body> </body>

View file

@ -368,7 +368,7 @@ directory of the D:YAML package.</p>
</div> </div>
<div class="footer"> <div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov. &copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011. Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div> </div>
</body> </body>

View file

@ -237,7 +237,7 @@ example in the <tt class="docutils literal"><span class="pre">example/getting_st
</div> </div>
<div class="footer"> <div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov. &copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011. Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div> </div>
</body> </body>

View file

@ -330,7 +330,7 @@ Some of these might change in the future (especially !!map and !!set).</p>
</div> </div>
<div class="footer"> <div class="footer">
&copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov. &copy; Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
Last updated on Oct 30, 2011. Last updated on Nov 15, 2011.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
</div> </div>
</body> </body>

View file

@ -7,24 +7,7 @@
///YAML anchor. ///YAML anchor.
module dyaml.anchor; module dyaml.anchor;
import dyaml.sharedobject; import dyaml.zerostring;
///YAML anchor (reference) struct. Encapsulates an anchor to save memory. ///YAML anchor (reference) struct. Encapsulates an anchor to save memory.
struct Anchor alias ZeroString!"Anchor" Anchor;
{
public:
mixin SharedObject!(string, Anchor);
///Construct an anchor from a string representation.
this(string anchor)
{
if(anchor is null || anchor == "")
{
index_ = uint.max;
return;
}
add(anchor);
}
}

View file

@ -25,7 +25,7 @@ import dyaml.node;
import dyaml.representer; import dyaml.representer;
import dyaml.resolver; import dyaml.resolver;
import dyaml.serializer; import dyaml.serializer;
import dyaml.tagdirectives; import dyaml.tagdirective;
/** /**
@ -146,7 +146,7 @@ struct Dumper
///YAML version string. ///YAML version string.
string YAMLVersion_ = "1.1"; string YAMLVersion_ = "1.1";
///Tag directives to use. ///Tag directives to use.
TagDirectives tags_ = TagDirectives(); TagDirective[] tags_ = null;
///Always write document start? ///Always write document start?
bool explicitStart_ = false; bool explicitStart_ = false;
///Always write document end? ///Always write document end?
@ -167,7 +167,7 @@ struct Dumper
* *
* Throws: YAMLException if the file can not be dumped to (e.g. cannot be opened). * Throws: YAMLException if the file can not be dumped to (e.g. cannot be opened).
*/ */
this(in string filename) this(string filename)
{ {
name_ = filename; name_ = filename;
try{this(new File(filename, FileMode.OutNew));} try{this(new File(filename, FileMode.OutNew));}
@ -184,20 +184,16 @@ struct Dumper
resolver_ = defaultResolver_; resolver_ = defaultResolver_;
representer_ = defaultRepresenter_; representer_ = defaultRepresenter_;
stream_ = stream; stream_ = stream;
Anchor.addReference();
TagDirectives.addReference();
} }
///Destroy the Dumper. ///Destroy the Dumper.
~this() ~this()
{ {
Anchor.removeReference();
TagDirectives.removeReference();
YAMLVersion_ = null; YAMLVersion_ = null;
} }
///Set stream _name. Used in debugging messages. ///Set stream _name. Used in debugging messages.
@property void name(in string name) @property void name(string name)
{ {
name_ = name; name_ = name;
} }
@ -217,13 +213,13 @@ struct Dumper
} }
///Write scalars in _canonical form? ///Write scalars in _canonical form?
@property void canonical(in bool canonical) @property void canonical(bool canonical)
{ {
canonical_ = canonical; canonical_ = canonical;
} }
///Set indentation width. 2 by default. Must not be zero. ///Set indentation width. 2 by default. Must not be zero.
@property void indent(in uint indent) @property void indent(uint indent)
in in
{ {
assert(indent != 0, "Can't use zero YAML indent width"); assert(indent != 0, "Can't use zero YAML indent width");
@ -234,37 +230,37 @@ struct Dumper
} }
///Set preferred text _width. ///Set preferred text _width.
@property void textWidth(in uint width) @property void textWidth(uint width)
{ {
textWidth_ = width; textWidth_ = width;
} }
///Set line break to use. Unix by default. ///Set line break to use. Unix by default.
@property void lineBreak(in LineBreak lineBreak) @property void lineBreak(LineBreak lineBreak)
{ {
lineBreak_ = lineBreak; lineBreak_ = lineBreak;
} }
///Set character _encoding to use. UTF-8 by default. ///Set character _encoding to use. UTF-8 by default.
@property void encoding(in Encoding encoding) @property void encoding(Encoding encoding)
{ {
encoding_ = encoding; encoding_ = encoding;
} }
///Always explicitly write document start? ///Always explicitly write document start?
@property void explicitStart(in bool explicit) @property void explicitStart(bool explicit)
{ {
explicitStart_ = explicit; explicitStart_ = explicit;
} }
///Always explicitly write document end? ///Always explicitly write document end?
@property void explicitEnd(in bool explicit) @property void explicitEnd(bool explicit)
{ {
explicitEnd_ = explicit; explicitEnd_ = explicit;
} }
///Specify YAML version string. "1.1" by default. ///Specify YAML version string. "1.1" by default.
@property void YAMLVersion(in string YAMLVersion) @property void YAMLVersion(string YAMLVersion)
{ {
YAMLVersion_ = YAMLVersion; YAMLVersion_ = YAMLVersion;
} }
@ -301,16 +297,16 @@ struct Dumper
*/ */
@property void tagDirectives(string[string] tags) @property void tagDirectives(string[string] tags)
{ {
tagDirective[] t; TagDirective[] t;
foreach(handle, prefix; tags) foreach(handle, prefix; tags)
{ {
assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!', assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!',
"A tag handle is empty or does not start and end with a " "A tag handle is empty or does not start and end with a "
"'!' character : " ~ handle); "'!' character : " ~ handle);
assert(prefix.length >= 1, "A tag prefix is empty"); assert(prefix.length >= 1, "A tag prefix is empty");
t ~= tagDirective(handle, prefix); t ~= TagDirective(handle, prefix);
} }
tags_ = TagDirectives(t); tags_ = t;
} }
/** /**
@ -352,7 +348,7 @@ struct Dumper
* *
* Throws: YAMLException if unable to emit. * Throws: YAMLException if unable to emit.
*/ */
void emit(in Event[] events) void emit(Event[] events)
{ {
try try
{ {

View file

@ -71,11 +71,11 @@ private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_;
struct Emitter struct Emitter
{ {
private: private:
alias dyaml.tagdirectives.tagDirective tagDirective; alias dyaml.tagdirective.TagDirective TagDirective;
///Default tag handle shortcuts and replacements. ///Default tag handle shortcuts and replacements.
static tagDirective[] defaultTagDirectives_ = static TagDirective[] defaultTagDirectives_ =
[tagDirective("!", "!"), tagDirective("!!", "tag:yaml.org,2002:")]; [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")];
///Stream to write to. ///Stream to write to.
Stream stream_; Stream stream_;
@ -135,7 +135,7 @@ struct Emitter
LineBreak bestLineBreak_; LineBreak bestLineBreak_;
///Tag directive handle - prefix pairs. ///Tag directive handle - prefix pairs.
tagDirective[] tagDirectives_; TagDirective[] tagDirectives_;
///Anchor/alias to process. ///Anchor/alias to process.
string preparedAnchor_ = null; string preparedAnchor_ = null;
@ -193,7 +193,7 @@ struct Emitter
} }
///Emit an event. Throws EmitterException on error. ///Emit an event. Throws EmitterException on error.
void emit(immutable Event event) void emit(Event event)
{ {
events_.push(event); events_.push(event);
while(!needMoreEvents()) while(!needMoreEvents())
@ -254,7 +254,7 @@ struct Emitter
{ {
if(events_.length == 0){return true;} if(events_.length == 0){return true;}
immutable event = events_.peek(); immutable event = cast(immutable Event)events_.peek();
if(event.id == EventID.DocumentStart){return needEvents(1);} if(event.id == EventID.DocumentStart){return needEvents(1);}
if(event.id == EventID.SequenceStart){return needEvents(2);} if(event.id == EventID.SequenceStart){return needEvents(2);}
if(event.id == EventID.MappingStart) {return needEvents(3);} if(event.id == EventID.MappingStart) {return needEvents(3);}
@ -274,7 +274,7 @@ struct Emitter
events_.next(); events_.next();
while(!events_.iterationOver()) while(!events_.iterationOver())
{ {
immutable event = events_.next(); immutable event = cast(immutable Event)events_.next();
static starts = [EventID.DocumentStart, EventID.SequenceStart, EventID.MappingStart]; static starts = [EventID.DocumentStart, EventID.SequenceStart, EventID.MappingStart];
static ends = [EventID.DocumentEnd, EventID.SequenceEnd, EventID.MappingEnd]; static ends = [EventID.DocumentEnd, EventID.SequenceEnd, EventID.MappingEnd];
if(starts.canFind(event.id)) {++level;} if(starts.canFind(event.id)) {++level;}
@ -347,8 +347,8 @@ struct Emitter
if(event_.id == EventID.DocumentStart) if(event_.id == EventID.DocumentStart)
{ {
const YAMLVersion = event_.value; const YAMLVersion = event_.value;
const tagDirectives = event_.tagDirectives; auto tagDirectives = event_.tagDirectives;
if(openEnded_ && (YAMLVersion !is null || !tagDirectives.isNull())) if(openEnded_ && (YAMLVersion !is null || tagDirectives !is null))
{ {
writeIndicator("...", true); writeIndicator("...", true);
writeIndent(); writeIndent();
@ -359,10 +359,10 @@ struct Emitter
writeVersionDirective(prepareVersion(YAMLVersion)); writeVersionDirective(prepareVersion(YAMLVersion));
} }
if(!tagDirectives.isNull()) if(tagDirectives !is null)
{ {
tagDirectives_ = tagDirectives.get; tagDirectives_ = tagDirectives;
sort!"icmp(a[0], b[0]) < 0"(tagDirectives_); sort!"icmp(a.handle, b.handle) < 0"(tagDirectives_);
foreach(ref pair; tagDirectives_) foreach(ref pair; tagDirectives_)
{ {
@ -371,7 +371,7 @@ struct Emitter
} }
} }
bool eq(ref tagDirective a, ref tagDirective b){return a.handle == b.handle;} bool eq(ref TagDirective a, ref TagDirective b){return a.handle == b.handle;}
//Add any default tag directives that have not been overriden. //Add any default tag directives that have not been overriden.
foreach(ref def; defaultTagDirectives_) foreach(ref def; defaultTagDirectives_)
{ {
@ -382,7 +382,7 @@ struct Emitter
} }
const implicit = first && !event_.explicitDocument && !canonical_ && const implicit = first && !event_.explicitDocument && !canonical_ &&
YAMLVersion is null && tagDirectives.isNull() && YAMLVersion is null && tagDirectives is null &&
!checkEmptyDocument(); !checkEmptyDocument();
if(!implicit) if(!implicit)
{ {
@ -684,7 +684,7 @@ struct Emitter
return false; return false;
} }
immutable event = events_.peek(); immutable event = cast(immutable Event)events_.peek();
const emptyScalar = event.id == EventID.Scalar && event.anchor.isNull() && const emptyScalar = event.id == EventID.Scalar && event.anchor.isNull() &&
event.tag.isNull() && event.implicit && event.value == ""; event.tag.isNull() && event.implicit && event.value == "";
return emptyScalar; return emptyScalar;
@ -933,14 +933,14 @@ struct Emitter
string suffix = tagString; string suffix = tagString;
//Sort lexicographically by prefix. //Sort lexicographically by prefix.
sort!"icmp(a[1], b[1]) < 0"(tagDirectives_); sort!"icmp(a.prefix, b.prefix) < 0"(tagDirectives_);
foreach(ref pair; tagDirectives_) foreach(ref pair; tagDirectives_)
{ {
auto prefix = pair[1]; auto prefix = pair.prefix;
if(tagString.startsWith(prefix) && if(tagString.startsWith(prefix) &&
(prefix != "!" || prefix.length < tagString.length)) (prefix != "!" || prefix.length < tagString.length))
{ {
handle = pair[0]; handle = pair.handle;
suffix = tagString[prefix.length .. $]; suffix = tagString[prefix.length .. $];
} }
} }

View file

@ -19,7 +19,7 @@ import dyaml.encoding;
import dyaml.exception; import dyaml.exception;
import dyaml.reader; import dyaml.reader;
import dyaml.tag; import dyaml.tag;
import dyaml.tagdirectives; import dyaml.tagdirective;
import dyaml.style; import dyaml.style;
@ -55,30 +55,40 @@ struct Event
Mark startMark; Mark startMark;
///End position of the event in file/stream. ///End position of the event in file/stream.
Mark endMark; Mark endMark;
union
{
struct
{
///Anchor of the event, if any. ///Anchor of the event, if any.
Anchor anchor; Anchor anchor;
///Tag of the event, if any. ///Tag of the event, if any.
Tag tag; Tag tag;
}
///Tag directives, if this is a DocumentStart.
//TagDirectives tagDirectives;
TagDirective[] tagDirectives;
}
///Event type. ///Event type.
EventID id = EventID.Invalid; EventID id = EventID.Invalid;
///Style of scalar event, if this is a scalar event. ///Style of scalar event, if this is a scalar event.
ScalarStyle scalarStyle; ScalarStyle scalarStyle = ScalarStyle.Invalid;
union
{
///Should the tag be implicitly resolved? ///Should the tag be implicitly resolved?
bool implicit; bool implicit;
///TODO figure this out - Unknown, used by PyYAML with Scalar events.
bool implicit_2;
/** /**
* Is this document event explicit? * Is this document event explicit?
* *
* Used if this is a DocumentStart or DocumentEnd. * Used if this is a DocumentStart or DocumentEnd.
*/ */
alias implicit explicitDocument; bool explicitDocument;
///Tag directives, if this is a DocumentStart. }
TagDirectives tagDirectives; ///TODO figure this out - Unknown, used by PyYAML with Scalar events.
bool implicit_2;
///Encoding of the stream, if this is a StreamStart. ///Encoding of the stream, if this is a StreamStart.
Encoding encoding; Encoding encoding;
///Collection style, if this is a SequenceStart or MappingStart. ///Collection style, if this is a SequenceStart or MappingStart.
CollectionStyle collectionStyle; CollectionStyle collectionStyle = CollectionStyle.Invalid;
///Is this a null (uninitialized) event? ///Is this a null (uninitialized) event?
@property bool isNull() const {return id == EventID.Invalid;} @property bool isNull() const {return id == EventID.Invalid;}
@ -96,7 +106,12 @@ struct Event
*/ */
Event event(EventID id)(in Mark start, in Mark end, in Anchor anchor = Anchor()) pure Event event(EventID id)(in Mark start, in Mark end, in Anchor anchor = Anchor()) pure
{ {
return Event(null, start, end, anchor, Tag(), id); Event result;
result.startMark = start;
result.endMark = end;
result.anchor = anchor;
result.id = id;
return result;
} }
/** /**
@ -114,8 +129,15 @@ Event collectionStartEvent(EventID id)(in Mark start, in Mark end, in Anchor anc
{ {
static assert(id == EventID.SequenceStart || id == EventID.SequenceEnd || static assert(id == EventID.SequenceStart || id == EventID.SequenceEnd ||
id == EventID.MappingStart || id == EventID.MappingEnd); id == EventID.MappingStart || id == EventID.MappingEnd);
return Event(null, start, end, anchor, tag, id, ScalarStyle.Invalid, implicit, Event result;
false, TagDirectives(), Encoding.UTF_8, style); result.startMark = start;
result.endMark = end;
result.anchor = anchor;
result.tag = tag;
result.id = id;
result.implicit = implicit;
result.collectionStyle = style;
return result;
} }
/** /**
@ -127,8 +149,12 @@ Event collectionStartEvent(EventID id)(in Mark start, in Mark end, in Anchor anc
*/ */
Event streamStartEvent(in Mark start, in Mark end, in Encoding encoding) pure Event streamStartEvent(in Mark start, in Mark end, in Encoding encoding) pure
{ {
return Event(null, start, end, Anchor(), Tag(), EventID.StreamStart, Event result;
ScalarStyle.Invalid, false, false, TagDirectives(), encoding); result.startMark = start;
result.endMark = end;
result.id = EventID.StreamStart;
result.encoding = encoding;
return result;
} }
///Aliases for simple events. ///Aliases for simple events.
@ -151,10 +177,16 @@ alias collectionStartEvent!(EventID.MappingStart) mappingStartEvent;
* tagDirectives = Tag directives of the document. * tagDirectives = Tag directives of the document.
*/ */
Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLVersion, Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLVersion,
in TagDirectives tagDirectives) pure TagDirective[] tagDirectives) pure
{ {
return Event(YAMLVersion, start, end, Anchor(), Tag(), EventID.DocumentStart, Event result;
ScalarStyle.Invalid, explicit, false, tagDirectives); result.value = YAMLVersion;
result.startMark = start;
result.endMark = end;
result.id = EventID.DocumentStart;
result.explicitDocument = explicit;
result.tagDirectives = tagDirectives;
return result;
} }
/** /**
@ -166,8 +198,12 @@ Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLV
*/ */
Event documentEndEvent(in Mark start, in Mark end, bool explicit) pure Event documentEndEvent(in Mark start, in Mark end, bool explicit) pure
{ {
return Event(null, start, end, Anchor(), Tag(), EventID.DocumentEnd, Event result;
ScalarStyle.Invalid, explicit); result.startMark = start;
result.endMark = end;
result.id = EventID.DocumentEnd;
result.explicitDocument = explicit;
return result;
} }
/** /**
@ -185,6 +221,15 @@ Event scalarEvent(in Mark start, in Mark end, in Anchor anchor, in Tag tag,
in Tuple!(bool, bool) implicit, in string value, in Tuple!(bool, bool) implicit, in string value,
in ScalarStyle style = ScalarStyle.Invalid) pure in ScalarStyle style = ScalarStyle.Invalid) pure
{ {
return Event(value, start, end, anchor, tag, EventID.Scalar, style, implicit[0], Event result;
implicit[1]); result.value = value;
result.startMark = start;
result.endMark = end;
result.anchor = anchor;
result.tag = tag;
result.id = EventID.Scalar;
result.scalarStyle = style;
result.implicit = implicit[0];
result.implicit_2 = implicit[1];
return result;
} }

View file

@ -13,7 +13,6 @@ module dyaml.loader;
import std.exception; import std.exception;
import std.stream; import std.stream;
import dyaml.anchor;
import dyaml.composer; import dyaml.composer;
import dyaml.constructor; import dyaml.constructor;
import dyaml.event; import dyaml.event;
@ -23,7 +22,6 @@ import dyaml.parser;
import dyaml.reader; import dyaml.reader;
import dyaml.resolver; import dyaml.resolver;
import dyaml.scanner; import dyaml.scanner;
import dyaml.tagdirectives;
import dyaml.token; import dyaml.token;
@ -114,6 +112,8 @@ struct Loader
Constructor constructor_; Constructor constructor_;
///Name of the input file or stream, used in error messages. ///Name of the input file or stream, used in error messages.
string name_ = "<unknown>"; string name_ = "<unknown>";
///Are we done loading?
bool done_ = false;
public: public:
@disable this(); @disable this();
@ -127,7 +127,7 @@ struct Loader
* *
* Throws: YAMLException if the file could not be opened or read. * Throws: YAMLException if the file could not be opened or read.
*/ */
this(in string filename) this(string filename)
{ {
name_ = filename; name_ = filename;
try{this(new File(filename));} try{this(new File(filename));}
@ -154,8 +154,6 @@ struct Loader
parser_ = new Parser(scanner_); parser_ = new Parser(scanner_);
resolver_ = defaultResolver_; resolver_ = defaultResolver_;
constructor_ = defaultConstructor_; constructor_ = defaultConstructor_;
Anchor.addReference();
TagDirectives.addReference();
} }
catch(YAMLException e) catch(YAMLException e)
{ {
@ -167,8 +165,6 @@ struct Loader
///Destroy the Loader. ///Destroy the Loader.
~this() ~this()
{ {
Anchor.removeReference();
TagDirectives.removeReference();
clear(reader_); clear(reader_);
clear(scanner_); clear(scanner_);
clear(parser_); clear(parser_);
@ -197,15 +193,23 @@ struct Loader
* *
* If none or more than one YAML document is found, this throws a YAMLException. * If none or more than one YAML document is found, this throws a YAMLException.
* *
* This can only be called once; this is enforced by contract.
*
* Returns: Root node of the document. * Returns: Root node of the document.
* *
* Throws: YAMLException if there wasn't exactly one document * Throws: YAMLException if there wasn't exactly one document
* or on a YAML parsing error. * or on a YAML parsing error.
*/ */
Node load() Node load()
in
{
assert(!done_, "Loader: Trying to load YAML twice");
}
body
{ {
try try
{ {
scope(exit){done_ = true;}
auto composer = new Composer(parser_, resolver_, constructor_); auto composer = new Composer(parser_, resolver_, constructor_);
enforce(composer.checkNode(), new YAMLException("No YAML document to load")); enforce(composer.checkNode(), new YAMLException("No YAML document to load"));
return composer.getSingleNode(); return composer.getSingleNode();
@ -225,6 +229,8 @@ struct Loader
* vice versa will not return any documents, as they have all been parsed * vice versa will not return any documents, as they have all been parsed
* already. * already.
* *
* This can only be called once; this is enforced by contract.
*
* Returns: Array of root nodes of all documents in the file/stream. * Returns: Array of root nodes of all documents in the file/stream.
* *
* Throws: YAMLException on a parsing error. * Throws: YAMLException on a parsing error.
@ -241,10 +247,18 @@ struct Loader
* *
* Parses documents lazily, when they are needed. * Parses documents lazily, when they are needed.
* *
* Foreach over a Loader can only be used once; this is enforced by contract.
*
* Throws: YAMLException on a parsing error. * Throws: YAMLException on a parsing error.
*/ */
int opApply(int delegate(ref Node) dg) int opApply(int delegate(ref Node) dg)
in
{ {
assert(!done_, "Loader: Trying to load YAML twice");
}
body
{
scope(exit){done_ = true;}
try try
{ {
auto composer = new Composer(parser_, resolver_, constructor_); auto composer = new Composer(parser_, resolver_, constructor_);
@ -284,11 +298,11 @@ struct Loader
} }
//Parse and return all events. Used for debugging. //Parse and return all events. Used for debugging.
Event[] parse() immutable(Event)[] parse()
{ {
try try
{ {
Event[] result; immutable(Event)[] result;
while(parser_.checkEvent()){result ~= parser_.getEvent();} while(parser_.checkEvent()){result ~= parser_.getEvent();}
return result; return result;
} }

View file

@ -24,7 +24,7 @@ import dyaml.scanner;
import dyaml.style; import dyaml.style;
import dyaml.token; import dyaml.token;
import dyaml.tag; import dyaml.tag;
import dyaml.tagdirectives; import dyaml.tagdirective;
package: package:
@ -108,10 +108,10 @@ final class Parser
{ {
private: private:
///Default tag handle shortcuts and replacements. ///Default tag handle shortcuts and replacements.
static tagDirective[] defaultTagDirectives_; static TagDirective[] defaultTagDirectives_;
static this() static this()
{ {
defaultTagDirectives_ = [tagDirective("!", "!"), tagDirective("!!", "tag:yaml.org,2002:")]; defaultTagDirectives_ = [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")];
} }
///Scanner providing YAML tokens. ///Scanner providing YAML tokens.
@ -123,7 +123,7 @@ final class Parser
///YAML version string. ///YAML version string.
string YAMLVersion_ = null; string YAMLVersion_ = null;
///Tag handle shortcuts and replacements. ///Tag handle shortcuts and replacements.
tagDirective[] tagDirectives_; TagDirective[] tagDirectives_;
///Stack of states. ///Stack of states.
Array!(Event delegate()) states_; Array!(Event delegate()) states_;
@ -193,13 +193,13 @@ final class Parser
* *
* Must not be called if there are no events left. * Must not be called if there are no events left.
*/ */
Event peekEvent() immutable(Event) peekEvent()
{ {
if(currentEvent_.isNull && state_ !is null) if(currentEvent_.isNull && state_ !is null)
{ {
currentEvent_ = state_(); currentEvent_ = state_();
} }
if(!currentEvent_.isNull){return currentEvent_;} if(!currentEvent_.isNull){return cast(immutable Event)currentEvent_;}
assert(false, "No event left to peek"); assert(false, "No event left to peek");
} }
@ -208,7 +208,7 @@ final class Parser
* *
* Must not be called if there are no events left. * Must not be called if there are no events left.
*/ */
Event getEvent() immutable(Event) getEvent()
{ {
//Get the next event and proceed further. //Get the next event and proceed further.
if(currentEvent_.isNull && state_ !is null) if(currentEvent_.isNull && state_ !is null)
@ -218,7 +218,7 @@ final class Parser
if(!currentEvent_.isNull) if(!currentEvent_.isNull)
{ {
immutable Event result = currentEvent_; immutable Event result = cast(immutable Event)currentEvent_;
currentEvent_.id = EventID.Invalid; currentEvent_.id = EventID.Invalid;
return result; return result;
} }
@ -273,7 +273,7 @@ final class Parser
states_ ~= &parseDocumentEnd; states_ ~= &parseDocumentEnd;
state_ = &parseBlockNode; state_ = &parseBlockNode;
return documentStartEvent(token.startMark, token.endMark, false, null, TagDirectives()); return documentStartEvent(token.startMark, token.endMark, false, null, null);
} }
return parseDocumentStart(); return parseDocumentStart();
} }
@ -336,7 +336,7 @@ final class Parser
} }
///Process directives at the beginning of a document. ///Process directives at the beginning of a document.
TagDirectives processDirectives() TagDirective[] processDirectives()
{ {
//Destroy version and tag handles from previous document. //Destroy version and tag handles from previous document.
YAMLVersion_ = null; YAMLVersion_ = null;
@ -367,21 +367,21 @@ final class Parser
foreach(ref pair; tagDirectives_) foreach(ref pair; tagDirectives_)
{ {
//handle //handle
const h = pair[0]; const h = pair.handle;
enforce(h != handle, new Error("Duplicate tag handle: " ~ handle, enforce(h != handle, new Error("Duplicate tag handle: " ~ handle,
token.startMark)); token.startMark));
} }
tagDirectives_ ~= tagDirective(handle, parts[2]); tagDirectives_ ~= TagDirective(handle, parts[2]);
} }
} }
TagDirectives value = tagDirectives_.length == 0 ? TagDirectives() : TagDirectives(tagDirectives_); TagDirective[] value = tagDirectives_;
//Add any default tag handles that haven't been overridden. //Add any default tag handles that haven't been overridden.
foreach(ref defaultPair; defaultTagDirectives_) foreach(ref defaultPair; defaultTagDirectives_)
{ {
bool found = false; bool found = false;
foreach(ref pair; tagDirectives_) if(defaultPair[0] == pair[0]) foreach(ref pair; tagDirectives_) if(defaultPair.handle == pair.handle)
{ {
found = true; found = true;
break; break;
@ -540,10 +540,9 @@ final class Parser
string replacement = null; string replacement = null;
foreach(ref pair; tagDirectives_) foreach(ref pair; tagDirectives_)
{ {
//pair[0] is handle, pair[1] replacement. if(pair.handle == handle)
if(pair[0] == handle)
{ {
replacement = pair[1]; replacement = pair.prefix;
break; break;
} }
} }

View file

@ -37,7 +37,7 @@ struct Queue(T)
///Linked list node containing one element and pointer to the next node. ///Linked list node containing one element and pointer to the next node.
struct Node struct Node
{ {
T payload_ = T.init; T payload_;
Node* next_ = null; Node* next_ = null;
} }
@ -90,7 +90,7 @@ struct Queue(T)
} }
///Push new item to the queue. ///Push new item to the queue.
void push(in T item) void push(T item)
{ {
Node* newLast = allocate!Node(item, cast(Node*)null); Node* newLast = allocate!Node(item, cast(Node*)null);
if(last_ !is null){last_.next_ = newLast;} if(last_ !is null){last_.next_ = newLast;}
@ -100,7 +100,7 @@ struct Queue(T)
} }
///Insert a new item putting it to specified index in the linked list. ///Insert a new item putting it to specified index in the linked list.
void insert(in T item, in size_t idx) void insert(T item, in size_t idx)
in in
{ {
assert(idx <= length_); assert(idx <= length_);
@ -155,7 +155,7 @@ struct Queue(T)
} }
///Return the next element in the queue. ///Return the next element in the queue.
ref const(T) peek() const ref inout(T) peek() inout
in in
{ {
assert(!empty, "Trying to peek at an element in an empty queue"); assert(!empty, "Trying to peek at an element in an empty queue");

View file

@ -7,7 +7,9 @@
module dyaml.reader; module dyaml.reader;
import core.stdc.stdlib;
import core.stdc.string; import core.stdc.string;
import core.thread;
import std.algorithm; import std.algorithm;
import std.conv; import std.conv;
@ -34,47 +36,30 @@ class ReaderException : YAMLException
} }
} }
///Reads data from a stream and converts it to UTF-32 (dchar) data. ///Lazily reads and decodes data from stream, only storing as much as needed at any moment.
final class Reader final class Reader
{ {
private: private:
///Input stream. //Input stream.
EndianStream stream_; EndianStream stream_;
///Allocated space for buffer_. //Allocated space for buffer_.
dchar[] bufferAllocated_; dchar[] bufferAllocated_ = null;
///Buffer of currently loaded characters. //Buffer of currently loaded characters.
dchar[] buffer_; dchar[] buffer_ = null;
///Current position within buffer. Only data after this position can be read. //Current position within buffer. Only data after this position can be read.
uint bufferOffset_ = 0; uint bufferOffset_ = 0;
///Index of the current character in the stream. //Index of the current character in the stream.
size_t charIndex_ = 0; size_t charIndex_ = 0;
///Encoding of the input stream. //Current line in file.
Encoding encoding_;
///Current line in file.
uint line_; uint line_;
///Current column in file. //Current column in file.
uint column_; uint column_;
///Number of bytes still available (not read) in the stream. //Decoder reading data from file and decoding it to UTF-32.
size_t available_; UTFFastDecoder decoder_;
///Capacity of raw buffers.
static immutable bufferLength8_ = 8;
///Capacity of raw buffers.
static immutable bufferLength16_ = bufferLength8_ / 2;
union
{
///Buffer to hold UTF-8 data before decoding.
char[bufferLength8_ + 1] rawBuffer8_;
///Buffer to hold UTF-16 data before decoding.
wchar[bufferLength16_ + 1] rawBuffer16_;
}
///Number of elements held in the used raw buffer.
uint rawUsed_ = 0;
public: public:
/** /*
* Construct a Reader. * Construct an AbstractReader.
* *
* Params: stream = Input stream. Must be readable and seekable. * Params: stream = Input stream. Must be readable and seekable.
* *
@ -89,51 +74,14 @@ final class Reader
body body
{ {
stream_ = new EndianStream(stream); stream_ = new EndianStream(stream);
available_ = stream_.available; decoder_ = UTFFastDecoder(stream_);
//handle files short enough not to have a BOM
if(available_ < 2)
{
encoding_ = Encoding.UTF_8;
return;
} }
//readBOM will determine and set stream endianness
switch(stream_.readBOM(2))
{
case -1:
//readBOM() eats two more bytes in this case so get them back
const wchar bytes = stream_.getcw();
rawBuffer8_[0] = cast(char)(bytes % 256);
rawBuffer8_[1] = cast(char)(bytes / 256);
rawUsed_ = 2;
goto case 0;
case 0: encoding_ = Encoding.UTF_8; break;
case 1, 2:
//readBOM() eats two more bytes in this case so get them back
encoding_ = Encoding.UTF_16;
rawBuffer16_[0] = stream_.getcw();
rawUsed_ = 1;
enforce(available_ % 2 == 0,
new ReaderException("Odd byte count in an UTF-16 stream"));
break;
case 3, 4:
enforce(available_ % 4 == 0,
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
encoding_ = Encoding.UTF_32;
break;
default: assert(false, "Unknown UTF BOM");
}
available_ = stream_.available;
auto ptr = cast(dchar*)core.stdc.stdlib.malloc(dchar.sizeof * 256);
bufferAllocated_ = ptr[0 .. 256];
}
///Destroy the Reader.
~this() ~this()
{ {
core.stdc.stdlib.free(bufferAllocated_.ptr); //Delete the buffer, if allocated.
if(bufferAllocated_ is null){return;}
free(bufferAllocated_.ptr);
buffer_ = bufferAllocated_ = null; buffer_ = bufferAllocated_ = null;
} }
@ -148,12 +96,13 @@ final class Reader
* Throws: ReaderException if trying to read past the end of the stream * Throws: ReaderException if trying to read past the end of the stream
* or if invalid data is read. * or if invalid data is read.
*/ */
dchar peek(in size_t index = 0) dchar peek(size_t index = 0)
{ {
if(buffer_.length <= bufferOffset_ + index + 1) if(buffer_.length < bufferOffset_ + index + 1)
{ {
updateBuffer(index + 1); updateBuffer(index + 1);
} }
if(buffer_.length <= bufferOffset_ + index) if(buffer_.length <= bufferOffset_ + index)
{ {
throw new ReaderException("Trying to read past the end of the stream"); throw new ReaderException("Trying to read past the end of the stream");
@ -172,7 +121,7 @@ final class Reader
* *
* Returns: Characters starting at current position or an empty slice if out of bounds. * Returns: Characters starting at current position or an empty slice if out of bounds.
*/ */
const(dstring) prefix(in size_t length) const(dstring) prefix(size_t length)
{ {
return slice(0, length); return slice(0, length);
} }
@ -194,12 +143,12 @@ final class Reader
{ {
updateBuffer(end); updateBuffer(end);
} }
end += bufferOffset_; end += bufferOffset_;
start += bufferOffset_; start += bufferOffset_;
end = min(buffer_.length, end); end = min(buffer_.length, end);
if(end <= start){return "";}
return cast(dstring)buffer_[start .. end]; return end > start ? cast(dstring)buffer_[start .. end] : "";
} }
/** /**
@ -227,7 +176,7 @@ final class Reader
* Throws: ReaderException if trying to read past the end of the stream * Throws: ReaderException if trying to read past the end of the stream
* or if invalid data is read. * or if invalid data is read.
*/ */
dstring get(in size_t length) dstring get(size_t length)
{ {
auto result = prefix(length).dup; auto result = prefix(length).dup;
forward(length); forward(length);
@ -244,13 +193,13 @@ final class Reader
*/ */
void forward(size_t length = 1) void forward(size_t length = 1)
{ {
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
if(buffer_.length <= bufferOffset_ + length + 1) if(buffer_.length <= bufferOffset_ + length + 1)
{ {
updateBuffer(length + 1); updateBuffer(length + 1);
} }
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
while(length > 0) while(length > 0)
{ {
const c = buffer_[bufferOffset_]; const c = buffer_[bufferOffset_];
@ -268,19 +217,19 @@ final class Reader
} }
///Get a string describing current stream position, used for error messages. ///Get a string describing current stream position, used for error messages.
@property Mark mark() const {return Mark(line_, column_);} @property final Mark mark() const {return Mark(line_, column_);}
///Get current line number. ///Get current line number.
@property uint line() const {return line_;} @property final uint line() const {return line_;}
///Get current line number. ///Get current column number.
@property uint column() const {return column_;} @property final uint column() const {return column_;}
///Get index of the current character in the stream. ///Get index of the current character in the stream.
@property size_t charIndex() const {return charIndex_;} @property final size_t charIndex() const {return charIndex_;}
///Get encoding of the input stream. ///Get encoding of the input stream.
@property Encoding encoding() const {return encoding_;} @property final Encoding encoding() const {return decoder_.encoding;}
private: private:
/** /**
@ -296,7 +245,7 @@ final class Reader
*/ */
void updateBuffer(in size_t length) void updateBuffer(in size_t length)
{ {
//get rid of unneeded data in the buffer //Get rid of unneeded data in the buffer.
if(bufferOffset_ > 0) if(bufferOffset_ > 0)
{ {
size_t bufferLength = buffer_.length - bufferOffset_; size_t bufferLength = buffer_.length - bufferOffset_;
@ -306,12 +255,12 @@ final class Reader
bufferOffset_ = 0; bufferOffset_ = 0;
} }
////Load chars in batches of at most 1024 bytes (256 chars) //Load chars in batches of at most 1024 bytes (256 chars)
while(buffer_.length <= bufferOffset_ + length) while(buffer_.length <= bufferOffset_ + length)
{ {
loadChars(256); loadChars(512);
if(done) if(decoder_.done)
{ {
if(buffer_.length == 0 || buffer_[$ - 1] != '\0') if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
{ {
@ -325,9 +274,11 @@ final class Reader
} }
/** /**
* Load at most specified number of characters. * Load more characters to the buffer.
* *
* Params: chars = Maximum number of characters to load. * Params: chars = Recommended number of characters to load.
* More characters might be loaded.
* Less will be loaded if not enough available.
* *
* Throws: ReaderException on Unicode decoding error, * Throws: ReaderException on Unicode decoding error,
* if nonprintable characters are detected, or * if nonprintable characters are detected, or
@ -335,96 +286,35 @@ final class Reader
*/ */
void loadChars(size_t chars) void loadChars(size_t chars)
{ {
///Get next character from the stream.
dchar getDChar()
{
final switch(encoding_)
{
case Encoding.UTF_8:
//Temp buffer for moving data in rawBuffer8_.
char[bufferLength8_] temp;
//Shortcut for ASCII.
if(rawUsed_ > 0 && rawBuffer8_[0] < 128)
{
//Get the first byte (one char in ASCII).
const dchar result = rawBuffer8_[0];
--rawUsed_;
//Move the data.
*(cast(ulong*)temp.ptr) = *(cast(ulong*)(rawBuffer8_.ptr + 1));
*(cast(ulong*)rawBuffer8_.ptr) = *(cast(ulong*)temp.ptr);
return result;
}
//Bytes to read.
const readBytes = min(available_, bufferLength8_ - rawUsed_);
available_ -= readBytes;
//Length of data in rawBuffer8_ after reading.
const len = rawUsed_ + readBytes;
//Read the data.
stream_.readExact(rawBuffer8_.ptr + rawUsed_, readBytes);
//After decoding, this will point to the first byte not decoded.
size_t idx = 0;
const dchar result = decode(rawBuffer8_, idx);
rawUsed_ = cast(uint)(len - idx);
//Move the data.
temp[0 .. rawUsed_] = rawBuffer8_[idx .. len];
rawBuffer8_[0 .. rawUsed_] = temp[0 .. rawUsed_];
return result;
case Encoding.UTF_16:
//Temp buffer for moving data in rawBuffer8_.
wchar[bufferLength16_] temp;
//Words to read.
size_t readWords = min(available_ / 2, bufferLength16_ - rawUsed_);
available_ -= readWords * 2;
//Length of data in rawBuffer16_ after reading.
size_t len = rawUsed_;
//Read the data.
while(readWords > 0)
{
//Due to a bug in std.stream, we have to use getcw here.
rawBuffer16_[len] = stream_.getcw();
--readWords;
++len;
}
//After decoding, this will point to the first word not decoded.
size_t idx = 0;
const dchar result = decode(rawBuffer16_, idx);
rawUsed_ = cast(uint)(len - idx);
//Move the data.
temp[0 .. rawUsed_] = rawBuffer16_[idx .. len];
rawBuffer16_[0 .. rawUsed_] = temp[0 .. rawUsed_];
return result;
case Encoding.UTF_32:
dchar result;
available_ -= 4;
stream_.read(result);
return result;
}
}
const oldLength = buffer_.length; const oldLength = buffer_.length;
const oldPosition = stream_.position; const oldPosition = stream_.position;
//Preallocating memory to limit GC reallocations.
bufferReserve(buffer_.length + chars); bufferReserve(buffer_.length + chars);
buffer_ = bufferAllocated_[0 .. buffer_.length + chars]; buffer_ = bufferAllocated_[0 .. buffer_.length + chars];
scope(exit) scope(success)
{ {
buffer_ = buffer_[0 .. $ - chars]; buffer_ = buffer_[0 .. $ - chars];
enforce(printable(buffer_[oldLength .. $]), enforce(printable(buffer_[oldLength .. $]),
new ReaderException("Special unicode characters are not allowed")); new ReaderException("Special unicode characters are not allowed"));
} }
try for(uint c = 0; chars; --chars, ++c) try for(size_t c = 0; chars && !decoder_.done;)
{ {
if(done){break;} const slice = decoder_.getDChars(chars);
buffer_[oldLength + c] = getDChar(); buffer_[oldLength + c .. oldLength + c + slice.length] = slice;
c += slice.length;
chars -= slice.length;
} }
catch(Exception e)
{
handleLoadCharsException(e, oldPosition);
}
}
//Handle an exception thrown in loadChars method of any Reader.
void handleLoadCharsException(Exception e, size_t oldPosition)
{
try{throw e;}
catch(UtfException e) catch(UtfException e)
{ {
const position = stream_.position; const position = stream_.position;
@ -437,6 +327,298 @@ final class Reader
} }
} }
//Code shared by loadEntireFile methods.
void loadEntireFile_()
{
const maxChars = decoder_.maxChars;
bufferReserve(maxChars + 1);
loadChars(maxChars);
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
{
buffer_ = bufferAllocated_[0 .. buffer_.length + 1];
buffer_[$ - 1] = '\0';
}
}
//Ensure there is space for at least capacity characters in bufferAllocated_.
void bufferReserve(in size_t capacity)
{
if(bufferAllocated_ !is null && bufferAllocated_.length >= capacity){return;}
//Handle first allocation as well as reallocation.
auto ptr = bufferAllocated_ !is null
? realloc(bufferAllocated_.ptr, capacity * dchar.sizeof)
: malloc(capacity * dchar.sizeof);
bufferAllocated_ = (cast(dchar*)ptr)[0 .. capacity];
buffer_ = bufferAllocated_[0 .. buffer_.length];
}
}
private:
alias UTFBlockDecoder!512 UTFFastDecoder;
///Decodes streams to UTF-32 in blocks.
struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
{
private:
//UTF-8 codepoint strides (0xFF are codepoints that can't start a sequence).
static immutable ubyte[256] utf8Stride =
[
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
];
//Encoding of the input stream.
Encoding encoding_;
//Maximum number of characters that might be in the stream.
size_t maxChars_;
//Bytes available in the stream.
size_t available_;
//Input stream.
EndianStream stream_;
//Buffer used to store raw UTF-8 or UTF-16 code points.
union
{
char[bufferSize_] rawBuffer8_;
wchar[bufferSize_ / 2] rawBuffer16_;
}
//Used space (in items) in rawBuffer8_/rawBuffer16_.
size_t rawUsed_;
//Space used by buffer_.
dchar[bufferSize_] bufferSpace_;
//Buffer of decoded, UTF-32 characters. This is a slice into bufferSpace_.
dchar[] buffer_;
public:
///Construct a UTFFastDecoder decoding a stream.
this(EndianStream stream)
{
stream_ = stream;
available_ = stream_.available;
//Handle files short enough not to have a BOM.
if(available_ < 2)
{
encoding_ = Encoding.UTF_8;
maxChars_ = 0;
if(available_ == 1)
{
bufferSpace_[0] = stream_.getc();
buffer_ = bufferSpace_[0 .. 1];
maxChars_ = 1;
}
return;
}
char[] rawBuffer8;
wchar[] rawBuffer16;
//readBOM will determine and set stream endianness.
switch(stream_.readBOM(2))
{
case -1:
//readBOM() eats two more bytes in this case so get them back.
const wchar bytes = stream_.getcw();
rawBuffer8_[0 .. 2] = [cast(ubyte)(bytes % 256), cast(ubyte)(bytes / 256)];
rawUsed_ = 2;
goto case 0;
case 0:
maxChars_ = available_;
encoding_ = Encoding.UTF_8;
break;
case 1, 2:
maxChars_ = available_ / 2;
//readBOM() eats two more bytes in this case so get them back.
encoding_ = Encoding.UTF_16;
rawBuffer16_[0] = stream_.getcw();
rawUsed_ = 1;
enforce(available_ % 2 == 0,
new ReaderException("Odd byte count in an UTF-16 stream"));
break;
case 3, 4:
maxChars_ = available_ / 4;
encoding_ = Encoding.UTF_32;
enforce(available_ % 4 == 0,
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
break;
default: assert(false, "Unknown UTF BOM");
}
available_ = stream_.available;
}
///Get maximum number of characters that might be in the stream.
@property size_t maxChars() const {return maxChars_;}
///Get encoding we're decoding from.
@property Encoding encoding() const {return encoding_;}
///Are we done decoding?
@property bool done() const
{
return rawUsed_ == 0 && buffer_.length == 0 && available_ == 0;
}
///Get next character.
dchar getDChar()
{
if(buffer_.length)
{
const result = buffer_[0];
buffer_ = buffer_[1 .. $];
return result;
}
assert(available_ > 0 || rawUsed_ > 0);
updateBuffer();
return getDChar();
}
///Get as many characters as possible, but at most maxChars. Slice returned will be invalidated in further calls.
const(dchar[]) getDChars(size_t maxChars = size_t.max)
{
if(buffer_.length)
{
const slice = min(buffer_.length, maxChars);
const result = buffer_[0 .. slice];
buffer_ = buffer_[slice .. $];
return result;
}
assert(available_ > 0 || rawUsed_ > 0);
updateBuffer();
return getDChars(maxChars);
}
private:
//Read and decode characters from file and store them in the buffer.
void updateBuffer()
{
assert(buffer_.length == 0);
final switch(encoding_)
{
case Encoding.UTF_8:
const bytes = min(bufferSize_ - rawUsed_, available_);
//Current length of valid data in rawBuffer8_.
const rawLength = rawUsed_ + bytes;
stream_.readExact(rawBuffer8_.ptr + rawUsed_, bytes);
available_ -= bytes;
decodeRawBuffer(rawBuffer8_, rawLength);
break;
case Encoding.UTF_16:
const words = min((bufferSize_ / 2) - rawUsed_, available_ / 2);
//Current length of valid data in rawBuffer16_.
const rawLength = rawUsed_ + words;
foreach(c; rawUsed_ .. rawLength)
{
stream_.read(rawBuffer16_[c]);
available_ -= 2;
}
decodeRawBuffer(rawBuffer16_, rawLength);
break;
case Encoding.UTF_32:
const chars = min(bufferSize_ / 4, available_ / 4);
foreach(c; 0 .. chars)
{
stream_.read(bufferSpace_[c]);
available_ -= 4;
}
buffer_ = bufferSpace_[0 .. chars];
break;
}
}
//Decode contents of a UTF-8 or UTF-16 raw buffer.
void decodeRawBuffer(C)(C[] buffer, const size_t length)
{
//End of part of rawBuffer8_ that contains
//complete characters and can be decoded.
const end = endOfLastUTFSequence(buffer, length);
//If end is 0, there are no full UTF-8 chars.
//This can happen at the end of file if there is an incomplete UTF-8 sequence.
enforce(end > 0,
new ReaderException("Invalid UTF-8 character at the end of stream"));
decodeUTF(buffer[0 .. end]);
//After decoding, any code points not decoded go to the start of raw buffer.
rawUsed_ = length - end;
foreach(i; 0 .. rawUsed_){buffer[i] = buffer[i + end];}
}
//Determine the end of last UTF-8 or UTF-16 sequence in a raw buffer.
size_t endOfLastUTFSequence(C)(const C[] buffer, const size_t max)
{
static if(is(C == char))
{
for(long end = max - 1; end >= 0; --end)
{
const s = utf8Stride[buffer[end]];
if(s != 0xFF)
{
//If stride goes beyond end of the buffer (max), return end.
//Otherwise the last sequence ends at max, so we can return that.
//(Unless there is an invalid code point, which is
//caught at decoding)
return (s > max - end) ? cast(size_t)end : max;
}
}
return 0;
}
else
{
size_t end = 0;
while(end < max)
{
const s = stride(buffer, end);
if(s + end > max){break;}
end += s;
}
return end;
}
}
//Decode a UTF-8 or UTF-16 buffer (with no incomplete sequences at the end).
void decodeUTF(C)(const C[] source)
{
size_t bufpos = 0;
const srclength = source.length;
for(size_t srcpos = 0; srcpos < srclength;)
{
const c = source[srcpos];
if(c < 0x80)
{
bufferSpace_[bufpos++] = c;
++srcpos;
}
else
{
bufferSpace_[bufpos++] = decode(source, srcpos);
}
}
buffer_ = bufferSpace_[0 .. bufpos];
}
}
/** /**
* Determine if all characters in an array are printable. * Determine if all characters in an array are printable.
* *
@ -444,7 +626,7 @@ final class Reader
* *
* Returns: True if all the characters are printable, false otherwise. * Returns: True if all the characters are printable, false otherwise.
*/ */
static bool printable(const ref dchar[] chars) pure bool printable(const ref dchar[] chars) pure
{ {
foreach(c; chars) foreach(c; chars)
{ {
@ -459,33 +641,15 @@ final class Reader
return true; return true;
} }
///Are we done reading? //Unittests.
@property bool done() const
{
return (available_ == 0 &&
((encoding_ == Encoding.UTF_8 && rawUsed_ == 0) ||
(encoding_ == Encoding.UTF_16 && rawUsed_ == 0) ||
encoding_ == Encoding.UTF_32));
}
///Ensure there is space for at least capacity characters in bufferAllocated_. void testEndian(R)()
void bufferReserve(in size_t capacity)
{ {
if(bufferAllocated_.length >= capacity){return;} writeln(typeid(R).toString() ~ ": endian unittest");
auto newPtr = core.stdc.stdlib.realloc(bufferAllocated_.ptr,
capacity * dchar.sizeof);
bufferAllocated_ = (cast(dchar*)newPtr)[0 .. capacity];
buffer_ = bufferAllocated_[0 .. buffer_.length];
}
unittest
{
writeln("D:YAML reader endian unittest");
void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected) void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected)
{ {
auto reader = new Reader(new MemoryStream(data)); Reader reader = new R(new MemoryStream(data));
assert(reader.encoding_ == encoding_expected); assert(reader.encoding == encoding_expected);
assert(reader.stream_.endian == endian_expected); assert(reader.stream_.endian == endian_expected);
} }
ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00]; ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
@ -493,11 +657,12 @@ final class Reader
endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian); endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian);
endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian); endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian);
} }
unittest
void testPeekPrefixForward(R)()
{ {
writeln("D:YAML reader peek/prefix/forward unittest"); writeln(typeid(R).toString() ~ ": peek/prefix/forward unittest");
ubyte[] data = ByteOrderMarks[BOM.UTF8] ~ cast(ubyte[])"data"; ubyte[] data = ByteOrderMarks[BOM.UTF8] ~ cast(ubyte[])"data";
auto reader = new Reader(new MemoryStream(data)); Reader reader = new R(new MemoryStream(data));
assert(reader.peek() == 'd'); assert(reader.peek() == 'd');
assert(reader.peek(1) == 'a'); assert(reader.peek(1) == 'a');
assert(reader.peek(2) == 't'); assert(reader.peek(2) == 't');
@ -509,15 +674,16 @@ final class Reader
assert(reader.peek(1) == 'a'); assert(reader.peek(1) == 'a');
assert(collectException(reader.peek(3))); assert(collectException(reader.peek(3)));
} }
unittest
void testUTF(R)()
{ {
writeln("D:YAML reader UTF formats unittest"); writeln(typeid(R).toString() ~ ": UTF formats unittest");
dchar[] data = cast(dchar[])"data"; dchar[] data = cast(dchar[])"data";
void utf_test(T)(T[] data, BOM bom) void utf_test(T)(T[] data, BOM bom)
{ {
ubyte[] bytes = ByteOrderMarks[bom] ~ ubyte[] bytes = ByteOrderMarks[bom] ~
(cast(ubyte*)data.ptr)[0 .. data.length * T.sizeof]; (cast(ubyte*)data.ptr)[0 .. data.length * T.sizeof];
auto reader = new Reader(new MemoryStream(bytes)); Reader reader = new R(new MemoryStream(bytes));
assert(reader.peek() == 'd'); assert(reader.peek() == 'd');
assert(reader.peek(1) == 'a'); assert(reader.peek(1) == 'a');
assert(reader.peek(2) == 't'); assert(reader.peek(2) == 't');
@ -527,4 +693,10 @@ final class Reader
utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.UTF16BE : BOM.UTF16LE); utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.UTF16BE : BOM.UTF16LE);
utf_test(data, endian == Endian.bigEndian ? BOM.UTF32BE : BOM.UTF32LE); utf_test(data, endian == Endian.bigEndian ? BOM.UTF32BE : BOM.UTF32LE);
} }
unittest
{
testEndian!Reader();
testPeekPrefixForward!Reader();
testUTF!Reader();
} }

View file

@ -65,7 +65,7 @@ final class Representer
* disabled to use custom representer * disabled to use custom representer
* functions for default types. * functions for default types.
*/ */
this(in bool useDefaultRepresenters = true) this(bool useDefaultRepresenters = true)
{ {
if(!useDefaultRepresenters){return;} if(!useDefaultRepresenters){return;}
addRepresenter!YAMLNull(&representNull); addRepresenter!YAMLNull(&representNull);
@ -87,13 +87,13 @@ final class Representer
} }
///Set default _style for scalars. Invalid means the _style is chosen automatically. ///Set default _style for scalars. Invalid means the _style is chosen automatically.
@property void defaultScalarStyle(in ScalarStyle style) @property void defaultScalarStyle(ScalarStyle style)
{ {
defaultScalarStyle_ = style; defaultScalarStyle_ = style;
} }
///Set default _style for collections. Invalid means the _style is chosen automatically. ///Set default _style for collections. Invalid means the _style is chosen automatically.
@property void defaultCollectionStyle(in CollectionStyle style) @property void defaultCollectionStyle(CollectionStyle style)
{ {
defaultCollectionStyle_ = style; defaultCollectionStyle_ = style;
} }
@ -237,7 +237,7 @@ final class Representer
* } * }
* -------------------- * --------------------
*/ */
Node representScalar(in string tag, string scalar, Node representScalar(string tag, string scalar,
ScalarStyle style = ScalarStyle.Invalid) ScalarStyle style = ScalarStyle.Invalid)
{ {
if(style == ScalarStyle.Invalid){style = defaultScalarStyle_;} if(style == ScalarStyle.Invalid){style = defaultScalarStyle_;}
@ -276,7 +276,7 @@ final class Representer
* } * }
* -------------------- * --------------------
*/ */
Node representSequence(in string tag, Node[] sequence, Node representSequence(string tag, Node[] sequence,
CollectionStyle style = CollectionStyle.Invalid) CollectionStyle style = CollectionStyle.Invalid)
{ {
Node[] value; Node[] value;
@ -335,7 +335,7 @@ final class Representer
* } * }
* -------------------- * --------------------
*/ */
Node representMapping(in string tag, Node.Pair[] pairs, Node representMapping(string tag, Node.Pair[] pairs,
CollectionStyle style = CollectionStyle.Invalid) CollectionStyle style = CollectionStyle.Invalid)
{ {
Node.Pair[] value; Node.Pair[] value;

View file

@ -23,7 +23,7 @@ import dyaml.exception;
import dyaml.node; import dyaml.node;
import dyaml.resolver; import dyaml.resolver;
import dyaml.tag; import dyaml.tag;
import dyaml.tagdirectives; import dyaml.tagdirective;
import dyaml.token; import dyaml.token;
@ -46,7 +46,7 @@ struct Serializer
string YAMLVersion_; string YAMLVersion_;
///Tag directives to emit. ///Tag directives to emit.
TagDirectives tagDirectives_; TagDirective[] tagDirectives_;
//TODO Use something with more deterministic memory usage. //TODO Use something with more deterministic memory usage.
///Nodes with assigned anchors. ///Nodes with assigned anchors.
@ -70,7 +70,7 @@ struct Serializer
*/ */
this(ref Emitter emitter, Resolver resolver, Encoding encoding, this(ref Emitter emitter, Resolver resolver, Encoding encoding,
in bool explicitStart, in bool explicitEnd, string YAMLVersion, in bool explicitStart, in bool explicitEnd, string YAMLVersion,
TagDirectives tagDirectives) TagDirective[] tagDirectives)
{ {
emitter_ = &emitter; emitter_ = &emitter;
resolver_ = resolver; resolver_ = resolver;

View file

@ -1,131 +0,0 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Shared object.
module dyaml.sharedobject;
/**
* Mixin for shared objects (need a better name).
*
* This works as an index to a static array of type T. Any new object created is
* checked for presence in the array to prevent duplication.
*
* This is useful for e.g. token/event data that rarely needs to be
* stored (e.g. tag directives) to prevent inflation of these structs,
* and when there are many instances of a data type that are mostly
* duplicates (e.g. tags).
*
* This is not the most elegant way to store the extra data and might change in future.
*/
template SharedObject(T, MixedIn)
{
private:
///This class stores the data that is shared between the objects.
class SharedData
{
private:
/**
* Reference count.
*
* When this reaches zero, objects_ are cleared. This is not
* the number of shared objects, but rather of objects using this kind
* of shared object.
*/
int referenceCount_ = 0;
///All known objects of type T are in this array.
T[] objects_;
public:
///Increment the reference count.
void addReference()
{
assert(referenceCount_ >= 0);
++referenceCount_;
}
///Decrement the reference count and clear the constructed objects if zero.
void removeReference()
{
--referenceCount_;
assert(referenceCount_ >= 0);
if(referenceCount_ == 0)
{
clear(objects_);
objects_ = [];
}
}
///Add an object and return its index.
uint add(ref T object)
{
foreach(index, ref known; objects_) if(object == known)
{
return cast(uint)index;
}
objects_ ~= object;
return cast(uint)objects_.length - 1;
}
///Get the object at specified object.
@property T get(in uint index)
{
return objects_[index];
}
}
///Index of the object in data_.
uint index_ = uint.max;
///Stores the actual objects.
static __gshared SharedData data_;
static this()
{
data_ = new SharedData;
}
public:
///Increment the reference count.
static void addReference()
{
synchronized(data_){data_.addReference();}
}
///Decrement the reference count and clear the constructed objects if zero.
static void removeReference()
{
synchronized(data_){data_.removeReference();}
}
///Get the object.
@property T get() const
in{assert(!isNull());}
body
{
T result;
synchronized(data_){result = data_.get(index_);}
return result;
}
///Test for equality with another object.
bool opEquals(const ref MixedIn object) const
{
return object.index_ == index_;
}
///Is this object null (invalid)?
@property bool isNull() const {return index_ == uint.max;}
private:
///Add a new object, checking if identical object already exists.
void add(ref T object)
{
synchronized(data_){index_ = data_.add(object);}
}
}

View file

@ -7,66 +7,7 @@
///YAML tag. ///YAML tag.
module dyaml.tag; module dyaml.tag;
import dyaml.zerostring;
import core.stdc.string; ///YAML tag (data type) struct. Encapsulates a tag to save memory and speed up comparison.
alias ZeroString!"Tag" Tag;
///YAML tag (data type) struct. Encapsulates a tag to save memory and speed-up comparison.
struct Tag
{
private:
///Zero terminated tag string.
immutable(char)* tag_ = null;
public:
@disable int opCmp(ref Tag);
///Construct a tag from a string representation.
this(in string tag)
{
if(tag is null || tag == "")
{
tag_ = null;
return;
}
tag_ = (tag ~ '\0').ptr;
}
///Get the tag string.
@property string get() const
in{assert(!isNull());}
body
{
return cast(string)tag_[0 .. strlen(tag_)];
}
///Test for equality with another tag.
bool opEquals(const ref Tag tag) const
{
return isNull ? tag.isNull :
tag.isNull ? false : (0 == strcmp(tag_, tag.tag_));
}
///Compute a hash.
hash_t toHash() const
in{assert(!isNull);}
body
{
static type = typeid(string);
auto str = get();
return type.getHash(&str);
}
///Compare with another tag.
int opCmp(const ref Tag tag) const
in{assert(!isNull && !tag.isNull);}
body
{
return strcmp(tag_, tag.tag_);
}
///Is this tag null (invalid)?
@property bool isNull() const {return tag_ is null;}
}

15
dyaml/tagdirective.d Normal file
View file

@ -0,0 +1,15 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Tag directives.
module dyaml.tagdirective;
///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it.
struct TagDirective
{
string handle;
string prefix;
}

View file

@ -1,28 +0,0 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Tag directives.
module dyaml.tagdirectives;
import std.typecons;
import dyaml.sharedobject;
///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it.
alias Tuple!(string, "handle", string, "prefix") tagDirective;
///Tag directives stored in Event.
struct TagDirectives
{
public:
mixin SharedObject!(tagDirective[], TagDirectives);
///Construct a tags object from an array of tag directives.
this(tagDirective[] tagDirectives)
{
add(tagDirectives);
}
}

73
dyaml/zerostring.d Normal file
View file

@ -0,0 +1,73 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
///Zero terminated string.
module dyaml.zerostring;
import core.stdc.string;
/**
* Zero terminated string used to decrease data structure size.
*
* TypeName is used to differentiate types (better than simple alias).
*/
struct ZeroString(string TypeName)
{
private:
///Zero terminated string.
immutable(char)* str_ = null;
public:
@disable int opCmp(ref ZeroString);
///Construct a string.
this(in string str)
{
if(str is null || str == "")
{
str_ = null;
return;
}
str_ = (str ~ '\0').ptr;
}
///Get the string.
@property string get() const
in{assert(!isNull());}
body
{
return cast(string)str_[0 .. strlen(str_)];
}
///Test for equality with another string.
bool opEquals(const ref ZeroString str) const
{
return isNull ? str.isNull :
str.isNull ? false : (0 == strcmp(str_, str.str_));
}
///Compute a hash.
hash_t toHash() const
in{assert(!isNull);}
body
{
static type = typeid(string);
auto str = get();
return type.getHash(&str);
}
///Compare with another string.
int opCmp(const ref ZeroString str) const
in{assert(!isNull && !str.isNull);}
body
{
return strcmp(str_, str.str_);
}
///Is this string null (invalid)?
@property bool isNull() const {return str_ is null;}
}

View file

@ -42,8 +42,8 @@ void testParser(bool verbose, string dataFilename, string canonicalFilename)
*/ */
void testLoader(bool verbose, string dataFilename, string canonicalFilename) void testLoader(bool verbose, string dataFilename, string canonicalFilename)
{ {
auto data = Loader(dataFilename).loadAll; auto data = Loader(dataFilename).loadAll();
auto canonical = Loader(canonicalFilename).loadAll; auto canonical = Loader(canonicalFilename).loadAll();
assert(data.length == canonical.length, "Unequal node count"); assert(data.length == canonical.length, "Unequal node count");
foreach(n; 0 .. data.length) foreach(n; 0 .. data.length)
@ -58,7 +58,7 @@ void testLoader(bool verbose, string dataFilename, string canonicalFilename)
writeln("Canonical value:"); writeln("Canonical value:");
writeln(canonical[n].debugString); writeln(canonical[n].debugString);
} }
assert(false); assert(false, "testLoader(" ~ dataFilename ~ ", " ~ canonicalFilename ~ ") failed");
} }
} }
} }

View file

@ -83,7 +83,7 @@ void testEmitterOnData(bool verbose, string dataFilename, string canonicalFilena
{ {
//Must exist due to Anchor, Tags reference counts. //Must exist due to Anchor, Tags reference counts.
auto loader = Loader(dataFilename); auto loader = Loader(dataFilename);
auto events = loader.parse(); auto events = cast(Event[])loader.parse();
auto emitStream = new MemoryStream; auto emitStream = new MemoryStream;
Dumper(emitStream).emit(events); Dumper(emitStream).emit(events);
@ -98,7 +98,7 @@ void testEmitterOnData(bool verbose, string dataFilename, string canonicalFilena
loader2.name = "TEST"; loader2.name = "TEST";
loader2.constructor = new Constructor; loader2.constructor = new Constructor;
loader2.resolver = new Resolver; loader2.resolver = new Resolver;
auto newEvents = loader2.parse(); auto newEvents = cast(Event[])loader2.parse();
assert(compareEvents(events, newEvents)); assert(compareEvents(events, newEvents));
} }
@ -114,7 +114,7 @@ void testEmitterOnCanonical(bool verbose, string canonicalFilename)
{ {
//Must exist due to Anchor, Tags reference counts. //Must exist due to Anchor, Tags reference counts.
auto loader = Loader(canonicalFilename); auto loader = Loader(canonicalFilename);
auto events = loader.parse(); auto events = cast(Event[])loader.parse();
foreach(canonical; [false, true]) foreach(canonical; [false, true])
{ {
auto emitStream = new MemoryStream; auto emitStream = new MemoryStream;
@ -130,7 +130,7 @@ void testEmitterOnCanonical(bool verbose, string canonicalFilename)
loader2.name = "TEST"; loader2.name = "TEST";
loader2.constructor = new Constructor; loader2.constructor = new Constructor;
loader2.resolver = new Resolver; loader2.resolver = new Resolver;
auto newEvents = loader2.parse(); auto newEvents = cast(Event[])loader2.parse();
assert(compareEvents(events, newEvents)); assert(compareEvents(events, newEvents));
} }
} }
@ -151,7 +151,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
{ {
//must exist due to Anchor, Tags reference counts //must exist due to Anchor, Tags reference counts
auto loader = Loader(canonicalFilename); auto loader = Loader(canonicalFilename);
auto events = loader.parse(); auto events = cast(Event[])loader.parse();
foreach(flowStyle; [CollectionStyle.Block, CollectionStyle.Flow]) foreach(flowStyle; [CollectionStyle.Block, CollectionStyle.Flow])
{ {
foreach(style; [ScalarStyle.Literal, ScalarStyle.Folded, foreach(style; [ScalarStyle.Literal, ScalarStyle.Folded,
@ -191,7 +191,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
loader2.name = "TEST"; loader2.name = "TEST";
loader2.constructor = new Constructor; loader2.constructor = new Constructor;
loader2.resolver = new Resolver; loader2.resolver = new Resolver;
auto newEvents = loader2.parse(); auto newEvents = cast(Event[])loader2.parse();
assert(compareEvents(events, newEvents)); assert(compareEvents(events, newEvents));
} }
} }

View file

@ -72,7 +72,8 @@ void testLoaderErrorFilename(bool verbose, string errorFilename)
if(verbose){writeln(typeid(e).toString(), "\n", e);} if(verbose){writeln(typeid(e).toString(), "\n", e);}
return; return;
} }
assert(false, "Expected an exception"); assert(false, "testLoaderErrorSingle(" ~ to!string(verbose) ~
", " ~ errorFilename ~ ") Expected an exception");
} }
/** /**