Reader was reimplemented.
After experiments with loading the whole file at once, and with decoding and parsing in separate thread, lazy reader turned to be the fastest/least memory intensive solution. Characters are now decoded in small batches. This improved parsing speed by ~20%. No global state anymore. Anchors are now zero terminated strings and TagDirectives are a simple array. Event structure was changed to prevent size increase. Minor fixes and improvements.
This commit is contained in:
parent
f95f0d14c8
commit
2c9d464389
Binary file not shown.
|
@ -112,57 +112,57 @@
|
|||
<dd><p>Set stream name. Used in debugging messages.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>(Resolver <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>);
|
||||
<dt class="d_decl">@property void <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>(Resolver <a name="resolver"></a><span class="ddoc_psymbol">resolver</span>);
|
||||
</dt>
|
||||
<dd><p>Specify custom Resolver to use.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="representer"></a><span class="ddoc_psymbol">representer</span>(Representer <a name="representer"></a><span class="ddoc_psymbol">representer</span>);
|
||||
<dt class="d_decl">@property void <a name="representer"></a><span class="ddoc_psymbol">representer</span>(Representer <a name="representer"></a><span class="ddoc_psymbol">representer</span>);
|
||||
</dt>
|
||||
<dd><p>Specify custom Representer to use.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>(in bool <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>);
|
||||
<dt class="d_decl">@property void <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>(bool <a name="canonical"></a><span class="ddoc_psymbol">canonical</span>);
|
||||
</dt>
|
||||
<dd><p>Write scalars in canonical form?</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="indent"></a><span class="ddoc_psymbol">indent</span>(in uint <a name="indent"></a><span class="ddoc_psymbol">indent</span>);
|
||||
<dt class="d_decl">@property void <a name="indent"></a><span class="ddoc_psymbol">indent</span>(uint <a name="indent"></a><span class="ddoc_psymbol">indent</span>);
|
||||
</dt>
|
||||
<dd><p>Set indentation width. 2 by default. Must not be zero.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="textWidth"></a><span class="ddoc_psymbol">textWidth</span>(in uint <b>width</b>);
|
||||
<dt class="d_decl">@property void <a name="textWidth"></a><span class="ddoc_psymbol">textWidth</span>(uint <b>width</b>);
|
||||
</dt>
|
||||
<dd><p>Set preferred text width.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>(in LineBreak <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>);
|
||||
<dt class="d_decl">@property void <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>(LineBreak <a name="lineBreak"></a><span class="ddoc_psymbol">lineBreak</span>);
|
||||
</dt>
|
||||
<dd><p>Set line break to use. Unix by default.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>(in Encoding <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>);
|
||||
<dt class="d_decl">@property void <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>(Encoding <a name="encoding"></a><span class="ddoc_psymbol">encoding</span>);
|
||||
</dt>
|
||||
<dd><p>Set character encoding to use. UTF-8 by default.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="explicitStart"></a><span class="ddoc_psymbol">explicitStart</span>(in bool <b>explicit</b>);
|
||||
<dt class="d_decl">@property void <a name="explicitStart"></a><span class="ddoc_psymbol">explicitStart</span>(bool <b>explicit</b>);
|
||||
</dt>
|
||||
<dd><p>Always explicitly write document start?</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="explicitEnd"></a><span class="ddoc_psymbol">explicitEnd</span>(in bool <b>explicit</b>);
|
||||
<dt class="d_decl">@property void <a name="explicitEnd"></a><span class="ddoc_psymbol">explicitEnd</span>(bool <b>explicit</b>);
|
||||
</dt>
|
||||
<dd><p>Always explicitly write document end?</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>(in string <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>);
|
||||
<dt class="d_decl">@property void <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>(string <a name="YAMLVersion"></a><span class="ddoc_psymbol">YAMLVersion</span>);
|
||||
</dt>
|
||||
<dd><p>Specify YAML version string. "1.1" by default.</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">void <a name="tagDirectives"></a><span class="ddoc_psymbol">tagDirectives</span>(string[string] <b>tags</b>);
|
||||
<dt class="d_decl">@property void <a name="tagDirectives"></a><span class="ddoc_psymbol">tagDirectives</span>(string[string] <b>tags</b>);
|
||||
</dt>
|
||||
<dd><p>Specify tag directives.
|
||||
</p>
|
||||
|
|
|
@ -98,11 +98,11 @@
|
|||
</pre>
|
||||
</div>
|
||||
|
||||
<dl><dt class="d_decl">this(in const(immutable(char)[]) <b>filename</b>);
|
||||
<dl><dt class="d_decl">this(string <b>filename</b>);
|
||||
</dt>
|
||||
<dd><p>Construct a Loader to load YAML from a file.
|
||||
</p>
|
||||
<b>Parameters:</b><div class="pbr"><table class=parms><tr><td valign=top>const(immutable(char)[]) <b>filename</b></td>
|
||||
<b>Parameters:</b><div class="pbr"><table class=parms><tr><td valign=top>string <b>filename</b></td>
|
||||
<td valign=top>Name of the file to load from.</td></tr>
|
||||
</table></div>
|
||||
<b>Throws:</b><div class="pbr">YAMLException if the file could not be opened or read.</div>
|
||||
|
@ -138,6 +138,9 @@
|
|||
<dd><p>Load single YAML document.
|
||||
</p>
|
||||
<p>If none or more than one YAML document is found, this throws a YAMLException.
|
||||
<br>
|
||||
|
||||
This can only be called once; this is enforced by contract.
|
||||
|
||||
</p>
|
||||
<b>Returns:</b><div class="pbr">Root node of the document.
|
||||
|
@ -155,6 +158,9 @@
|
|||
them all at once. Calling <a name="loadAll"></a><span class="ddoc_psymbol">loadAll</span> after iterating over the node or
|
||||
vice versa will not return any documents, as they have all been parsed
|
||||
already.
|
||||
<br>
|
||||
|
||||
This can only be called once; this is enforced by contract.
|
||||
|
||||
</p>
|
||||
<b>Returns:</b><div class="pbr">Array of root nodes of all documents in the file/stream.
|
||||
|
@ -168,6 +174,9 @@
|
|||
<dd><p>Foreach over YAML documents.
|
||||
</p>
|
||||
<p>Parses documents lazily, when they are needed.
|
||||
<br>
|
||||
|
||||
Foreach over a Loader can only be used once; this is enforced by contract.
|
||||
|
||||
</p>
|
||||
<b>Throws:</b><div class="pbr">YAMLException on a parsing error.</div>
|
||||
|
|
|
@ -182,7 +182,7 @@
|
|||
</div>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">Node <a name="representScalar"></a><span class="ddoc_psymbol">representScalar</span>(in string <b>tag</b>, string <b>scalar</b>, ScalarStyle <b>style</b> = (ScalarStyle).Invalid);
|
||||
<dt class="d_decl">Node <a name="representScalar"></a><span class="ddoc_psymbol">representScalar</span>(string <b>tag</b>, string <b>scalar</b>, ScalarStyle <b>style</b> = (ScalarStyle).Invalid);
|
||||
</dt>
|
||||
<dd><p>Represent a scalar with specified tag.
|
||||
</p>
|
||||
|
@ -217,7 +217,7 @@
|
|||
</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">Node <a name="representSequence"></a><span class="ddoc_psymbol">representSequence</span>(in string <b>tag</b>, Node[] <b>sequence</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
|
||||
<dt class="d_decl">Node <a name="representSequence"></a><span class="ddoc_psymbol">representSequence</span>(string <b>tag</b>, Node[] <b>sequence</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
|
||||
</dt>
|
||||
<dd><p>Represent a sequence with specified tag, representing children first.
|
||||
</p>
|
||||
|
@ -256,7 +256,7 @@
|
|||
</p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl">Node <a name="representMapping"></a><span class="ddoc_psymbol">representMapping</span>(in string <b>tag</b>, Pair[] <b>pairs</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
|
||||
<dt class="d_decl">Node <a name="representMapping"></a><span class="ddoc_psymbol">representMapping</span>(string <b>tag</b>, Pair[] <b>pairs</b>, CollectionStyle <b>style</b> = (CollectionStyle).Invalid);
|
||||
</dt>
|
||||
<dd><p>Represent a mapping with specified tag, representing children first.
|
||||
</p>
|
||||
|
|
|
@ -138,7 +138,7 @@ struct appears in Phobos.</p>
|
|||
</div>
|
||||
<div class="footer">
|
||||
© Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
|
||||
Last updated on Oct 30, 2011.
|
||||
Last updated on Nov 15, 2011.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
@ -104,7 +104,7 @@
|
|||
</div>
|
||||
<div class="footer">
|
||||
© Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
|
||||
Last updated on Oct 30, 2011.
|
||||
Last updated on Nov 15, 2011.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
@ -87,7 +87,7 @@
|
|||
</div>
|
||||
<div class="footer">
|
||||
© Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
|
||||
Last updated on Oct 30, 2011.
|
||||
Last updated on Nov 15, 2011.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
@ -368,7 +368,7 @@ directory of the D:YAML package.</p>
|
|||
</div>
|
||||
<div class="footer">
|
||||
© Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
|
||||
Last updated on Oct 30, 2011.
|
||||
Last updated on Nov 15, 2011.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
@ -237,7 +237,7 @@ example in the <tt class="docutils literal"><span class="pre">example/getting_st
|
|||
</div>
|
||||
<div class="footer">
|
||||
© Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
|
||||
Last updated on Oct 30, 2011.
|
||||
Last updated on Nov 15, 2011.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
@ -330,7 +330,7 @@ Some of these might change in the future (especially !!map and !!set).</p>
|
|||
</div>
|
||||
<div class="footer">
|
||||
© Copyright 2011, Ferdinand Majerech. Based on PyYAML http://www.pyyaml.org by Kirill Simonov.
|
||||
Last updated on Oct 30, 2011.
|
||||
Last updated on Nov 15, 2011.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.7.
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
@ -7,24 +7,7 @@
|
|||
///YAML anchor.
|
||||
module dyaml.anchor;
|
||||
|
||||
import dyaml.sharedobject;
|
||||
|
||||
import dyaml.zerostring;
|
||||
|
||||
///YAML anchor (reference) struct. Encapsulates an anchor to save memory.
|
||||
struct Anchor
|
||||
{
|
||||
public:
|
||||
mixin SharedObject!(string, Anchor);
|
||||
|
||||
///Construct an anchor from a string representation.
|
||||
this(string anchor)
|
||||
{
|
||||
if(anchor is null || anchor == "")
|
||||
{
|
||||
index_ = uint.max;
|
||||
return;
|
||||
}
|
||||
|
||||
add(anchor);
|
||||
}
|
||||
}
|
||||
alias ZeroString!"Anchor" Anchor;
|
||||
|
|
|
@ -25,7 +25,7 @@ import dyaml.node;
|
|||
import dyaml.representer;
|
||||
import dyaml.resolver;
|
||||
import dyaml.serializer;
|
||||
import dyaml.tagdirectives;
|
||||
import dyaml.tagdirective;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -146,7 +146,7 @@ struct Dumper
|
|||
///YAML version string.
|
||||
string YAMLVersion_ = "1.1";
|
||||
///Tag directives to use.
|
||||
TagDirectives tags_ = TagDirectives();
|
||||
TagDirective[] tags_ = null;
|
||||
///Always write document start?
|
||||
bool explicitStart_ = false;
|
||||
///Always write document end?
|
||||
|
@ -167,7 +167,7 @@ struct Dumper
|
|||
*
|
||||
* Throws: YAMLException if the file can not be dumped to (e.g. cannot be opened).
|
||||
*/
|
||||
this(in string filename)
|
||||
this(string filename)
|
||||
{
|
||||
name_ = filename;
|
||||
try{this(new File(filename, FileMode.OutNew));}
|
||||
|
@ -184,20 +184,16 @@ struct Dumper
|
|||
resolver_ = defaultResolver_;
|
||||
representer_ = defaultRepresenter_;
|
||||
stream_ = stream;
|
||||
Anchor.addReference();
|
||||
TagDirectives.addReference();
|
||||
}
|
||||
|
||||
///Destroy the Dumper.
|
||||
~this()
|
||||
{
|
||||
Anchor.removeReference();
|
||||
TagDirectives.removeReference();
|
||||
YAMLVersion_ = null;
|
||||
}
|
||||
|
||||
///Set stream _name. Used in debugging messages.
|
||||
@property void name(in string name)
|
||||
@property void name(string name)
|
||||
{
|
||||
name_ = name;
|
||||
}
|
||||
|
@ -217,13 +213,13 @@ struct Dumper
|
|||
}
|
||||
|
||||
///Write scalars in _canonical form?
|
||||
@property void canonical(in bool canonical)
|
||||
@property void canonical(bool canonical)
|
||||
{
|
||||
canonical_ = canonical;
|
||||
}
|
||||
|
||||
///Set indentation width. 2 by default. Must not be zero.
|
||||
@property void indent(in uint indent)
|
||||
@property void indent(uint indent)
|
||||
in
|
||||
{
|
||||
assert(indent != 0, "Can't use zero YAML indent width");
|
||||
|
@ -234,37 +230,37 @@ struct Dumper
|
|||
}
|
||||
|
||||
///Set preferred text _width.
|
||||
@property void textWidth(in uint width)
|
||||
@property void textWidth(uint width)
|
||||
{
|
||||
textWidth_ = width;
|
||||
}
|
||||
|
||||
///Set line break to use. Unix by default.
|
||||
@property void lineBreak(in LineBreak lineBreak)
|
||||
@property void lineBreak(LineBreak lineBreak)
|
||||
{
|
||||
lineBreak_ = lineBreak;
|
||||
}
|
||||
|
||||
///Set character _encoding to use. UTF-8 by default.
|
||||
@property void encoding(in Encoding encoding)
|
||||
@property void encoding(Encoding encoding)
|
||||
{
|
||||
encoding_ = encoding;
|
||||
}
|
||||
|
||||
///Always explicitly write document start?
|
||||
@property void explicitStart(in bool explicit)
|
||||
@property void explicitStart(bool explicit)
|
||||
{
|
||||
explicitStart_ = explicit;
|
||||
}
|
||||
|
||||
///Always explicitly write document end?
|
||||
@property void explicitEnd(in bool explicit)
|
||||
@property void explicitEnd(bool explicit)
|
||||
{
|
||||
explicitEnd_ = explicit;
|
||||
}
|
||||
|
||||
///Specify YAML version string. "1.1" by default.
|
||||
@property void YAMLVersion(in string YAMLVersion)
|
||||
@property void YAMLVersion(string YAMLVersion)
|
||||
{
|
||||
YAMLVersion_ = YAMLVersion;
|
||||
}
|
||||
|
@ -301,16 +297,16 @@ struct Dumper
|
|||
*/
|
||||
@property void tagDirectives(string[string] tags)
|
||||
{
|
||||
tagDirective[] t;
|
||||
TagDirective[] t;
|
||||
foreach(handle, prefix; tags)
|
||||
{
|
||||
assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!',
|
||||
"A tag handle is empty or does not start and end with a "
|
||||
"'!' character : " ~ handle);
|
||||
assert(prefix.length >= 1, "A tag prefix is empty");
|
||||
t ~= tagDirective(handle, prefix);
|
||||
t ~= TagDirective(handle, prefix);
|
||||
}
|
||||
tags_ = TagDirectives(t);
|
||||
tags_ = t;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -352,7 +348,7 @@ struct Dumper
|
|||
*
|
||||
* Throws: YAMLException if unable to emit.
|
||||
*/
|
||||
void emit(in Event[] events)
|
||||
void emit(Event[] events)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
|
|
@ -71,11 +71,11 @@ private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_;
|
|||
struct Emitter
|
||||
{
|
||||
private:
|
||||
alias dyaml.tagdirectives.tagDirective tagDirective;
|
||||
alias dyaml.tagdirective.TagDirective TagDirective;
|
||||
|
||||
///Default tag handle shortcuts and replacements.
|
||||
static tagDirective[] defaultTagDirectives_ =
|
||||
[tagDirective("!", "!"), tagDirective("!!", "tag:yaml.org,2002:")];
|
||||
static TagDirective[] defaultTagDirectives_ =
|
||||
[TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")];
|
||||
|
||||
///Stream to write to.
|
||||
Stream stream_;
|
||||
|
@ -135,7 +135,7 @@ struct Emitter
|
|||
LineBreak bestLineBreak_;
|
||||
|
||||
///Tag directive handle - prefix pairs.
|
||||
tagDirective[] tagDirectives_;
|
||||
TagDirective[] tagDirectives_;
|
||||
|
||||
///Anchor/alias to process.
|
||||
string preparedAnchor_ = null;
|
||||
|
@ -193,7 +193,7 @@ struct Emitter
|
|||
}
|
||||
|
||||
///Emit an event. Throws EmitterException on error.
|
||||
void emit(immutable Event event)
|
||||
void emit(Event event)
|
||||
{
|
||||
events_.push(event);
|
||||
while(!needMoreEvents())
|
||||
|
@ -254,7 +254,7 @@ struct Emitter
|
|||
{
|
||||
if(events_.length == 0){return true;}
|
||||
|
||||
immutable event = events_.peek();
|
||||
immutable event = cast(immutable Event)events_.peek();
|
||||
if(event.id == EventID.DocumentStart){return needEvents(1);}
|
||||
if(event.id == EventID.SequenceStart){return needEvents(2);}
|
||||
if(event.id == EventID.MappingStart) {return needEvents(3);}
|
||||
|
@ -274,7 +274,7 @@ struct Emitter
|
|||
events_.next();
|
||||
while(!events_.iterationOver())
|
||||
{
|
||||
immutable event = events_.next();
|
||||
immutable event = cast(immutable Event)events_.next();
|
||||
static starts = [EventID.DocumentStart, EventID.SequenceStart, EventID.MappingStart];
|
||||
static ends = [EventID.DocumentEnd, EventID.SequenceEnd, EventID.MappingEnd];
|
||||
if(starts.canFind(event.id)) {++level;}
|
||||
|
@ -347,8 +347,8 @@ struct Emitter
|
|||
if(event_.id == EventID.DocumentStart)
|
||||
{
|
||||
const YAMLVersion = event_.value;
|
||||
const tagDirectives = event_.tagDirectives;
|
||||
if(openEnded_ && (YAMLVersion !is null || !tagDirectives.isNull()))
|
||||
auto tagDirectives = event_.tagDirectives;
|
||||
if(openEnded_ && (YAMLVersion !is null || tagDirectives !is null))
|
||||
{
|
||||
writeIndicator("...", true);
|
||||
writeIndent();
|
||||
|
@ -359,10 +359,10 @@ struct Emitter
|
|||
writeVersionDirective(prepareVersion(YAMLVersion));
|
||||
}
|
||||
|
||||
if(!tagDirectives.isNull())
|
||||
if(tagDirectives !is null)
|
||||
{
|
||||
tagDirectives_ = tagDirectives.get;
|
||||
sort!"icmp(a[0], b[0]) < 0"(tagDirectives_);
|
||||
tagDirectives_ = tagDirectives;
|
||||
sort!"icmp(a.handle, b.handle) < 0"(tagDirectives_);
|
||||
|
||||
foreach(ref pair; tagDirectives_)
|
||||
{
|
||||
|
@ -371,7 +371,7 @@ struct Emitter
|
|||
}
|
||||
}
|
||||
|
||||
bool eq(ref tagDirective a, ref tagDirective b){return a.handle == b.handle;}
|
||||
bool eq(ref TagDirective a, ref TagDirective b){return a.handle == b.handle;}
|
||||
//Add any default tag directives that have not been overriden.
|
||||
foreach(ref def; defaultTagDirectives_)
|
||||
{
|
||||
|
@ -382,7 +382,7 @@ struct Emitter
|
|||
}
|
||||
|
||||
const implicit = first && !event_.explicitDocument && !canonical_ &&
|
||||
YAMLVersion is null && tagDirectives.isNull() &&
|
||||
YAMLVersion is null && tagDirectives is null &&
|
||||
!checkEmptyDocument();
|
||||
if(!implicit)
|
||||
{
|
||||
|
@ -684,7 +684,7 @@ struct Emitter
|
|||
return false;
|
||||
}
|
||||
|
||||
immutable event = events_.peek();
|
||||
immutable event = cast(immutable Event)events_.peek();
|
||||
const emptyScalar = event.id == EventID.Scalar && event.anchor.isNull() &&
|
||||
event.tag.isNull() && event.implicit && event.value == "";
|
||||
return emptyScalar;
|
||||
|
@ -933,14 +933,14 @@ struct Emitter
|
|||
string suffix = tagString;
|
||||
|
||||
//Sort lexicographically by prefix.
|
||||
sort!"icmp(a[1], b[1]) < 0"(tagDirectives_);
|
||||
sort!"icmp(a.prefix, b.prefix) < 0"(tagDirectives_);
|
||||
foreach(ref pair; tagDirectives_)
|
||||
{
|
||||
auto prefix = pair[1];
|
||||
auto prefix = pair.prefix;
|
||||
if(tagString.startsWith(prefix) &&
|
||||
(prefix != "!" || prefix.length < tagString.length))
|
||||
{
|
||||
handle = pair[0];
|
||||
handle = pair.handle;
|
||||
suffix = tagString[prefix.length .. $];
|
||||
}
|
||||
}
|
||||
|
|
103
dyaml/event.d
103
dyaml/event.d
|
@ -19,7 +19,7 @@ import dyaml.encoding;
|
|||
import dyaml.exception;
|
||||
import dyaml.reader;
|
||||
import dyaml.tag;
|
||||
import dyaml.tagdirectives;
|
||||
import dyaml.tagdirective;
|
||||
import dyaml.style;
|
||||
|
||||
|
||||
|
@ -55,30 +55,40 @@ struct Event
|
|||
Mark startMark;
|
||||
///End position of the event in file/stream.
|
||||
Mark endMark;
|
||||
///Anchor of the event, if any.
|
||||
Anchor anchor;
|
||||
///Tag of the event, if any.
|
||||
Tag tag;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
///Anchor of the event, if any.
|
||||
Anchor anchor;
|
||||
///Tag of the event, if any.
|
||||
Tag tag;
|
||||
}
|
||||
///Tag directives, if this is a DocumentStart.
|
||||
//TagDirectives tagDirectives;
|
||||
TagDirective[] tagDirectives;
|
||||
}
|
||||
///Event type.
|
||||
EventID id = EventID.Invalid;
|
||||
///Style of scalar event, if this is a scalar event.
|
||||
ScalarStyle scalarStyle;
|
||||
///Should the tag be implicitly resolved?
|
||||
bool implicit;
|
||||
ScalarStyle scalarStyle = ScalarStyle.Invalid;
|
||||
union
|
||||
{
|
||||
///Should the tag be implicitly resolved?
|
||||
bool implicit;
|
||||
/**
|
||||
* Is this document event explicit?
|
||||
*
|
||||
* Used if this is a DocumentStart or DocumentEnd.
|
||||
*/
|
||||
bool explicitDocument;
|
||||
}
|
||||
///TODO figure this out - Unknown, used by PyYAML with Scalar events.
|
||||
bool implicit_2;
|
||||
/**
|
||||
* Is this document event explicit?
|
||||
*
|
||||
* Used if this is a DocumentStart or DocumentEnd.
|
||||
*/
|
||||
alias implicit explicitDocument;
|
||||
///Tag directives, if this is a DocumentStart.
|
||||
TagDirectives tagDirectives;
|
||||
///Encoding of the stream, if this is a StreamStart.
|
||||
Encoding encoding;
|
||||
///Collection style, if this is a SequenceStart or MappingStart.
|
||||
CollectionStyle collectionStyle;
|
||||
CollectionStyle collectionStyle = CollectionStyle.Invalid;
|
||||
|
||||
///Is this a null (uninitialized) event?
|
||||
@property bool isNull() const {return id == EventID.Invalid;}
|
||||
|
@ -96,7 +106,12 @@ struct Event
|
|||
*/
|
||||
Event event(EventID id)(in Mark start, in Mark end, in Anchor anchor = Anchor()) pure
|
||||
{
|
||||
return Event(null, start, end, anchor, Tag(), id);
|
||||
Event result;
|
||||
result.startMark = start;
|
||||
result.endMark = end;
|
||||
result.anchor = anchor;
|
||||
result.id = id;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -114,8 +129,15 @@ Event collectionStartEvent(EventID id)(in Mark start, in Mark end, in Anchor anc
|
|||
{
|
||||
static assert(id == EventID.SequenceStart || id == EventID.SequenceEnd ||
|
||||
id == EventID.MappingStart || id == EventID.MappingEnd);
|
||||
return Event(null, start, end, anchor, tag, id, ScalarStyle.Invalid, implicit,
|
||||
false, TagDirectives(), Encoding.UTF_8, style);
|
||||
Event result;
|
||||
result.startMark = start;
|
||||
result.endMark = end;
|
||||
result.anchor = anchor;
|
||||
result.tag = tag;
|
||||
result.id = id;
|
||||
result.implicit = implicit;
|
||||
result.collectionStyle = style;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -127,8 +149,12 @@ Event collectionStartEvent(EventID id)(in Mark start, in Mark end, in Anchor anc
|
|||
*/
|
||||
Event streamStartEvent(in Mark start, in Mark end, in Encoding encoding) pure
|
||||
{
|
||||
return Event(null, start, end, Anchor(), Tag(), EventID.StreamStart,
|
||||
ScalarStyle.Invalid, false, false, TagDirectives(), encoding);
|
||||
Event result;
|
||||
result.startMark = start;
|
||||
result.endMark = end;
|
||||
result.id = EventID.StreamStart;
|
||||
result.encoding = encoding;
|
||||
return result;
|
||||
}
|
||||
|
||||
///Aliases for simple events.
|
||||
|
@ -151,10 +177,16 @@ alias collectionStartEvent!(EventID.MappingStart) mappingStartEvent;
|
|||
* tagDirectives = Tag directives of the document.
|
||||
*/
|
||||
Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLVersion,
|
||||
in TagDirectives tagDirectives) pure
|
||||
TagDirective[] tagDirectives) pure
|
||||
{
|
||||
return Event(YAMLVersion, start, end, Anchor(), Tag(), EventID.DocumentStart,
|
||||
ScalarStyle.Invalid, explicit, false, tagDirectives);
|
||||
Event result;
|
||||
result.value = YAMLVersion;
|
||||
result.startMark = start;
|
||||
result.endMark = end;
|
||||
result.id = EventID.DocumentStart;
|
||||
result.explicitDocument = explicit;
|
||||
result.tagDirectives = tagDirectives;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -166,8 +198,12 @@ Event documentStartEvent(in Mark start, in Mark end, bool explicit, string YAMLV
|
|||
*/
|
||||
Event documentEndEvent(in Mark start, in Mark end, bool explicit) pure
|
||||
{
|
||||
return Event(null, start, end, Anchor(), Tag(), EventID.DocumentEnd,
|
||||
ScalarStyle.Invalid, explicit);
|
||||
Event result;
|
||||
result.startMark = start;
|
||||
result.endMark = end;
|
||||
result.id = EventID.DocumentEnd;
|
||||
result.explicitDocument = explicit;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -185,6 +221,15 @@ Event scalarEvent(in Mark start, in Mark end, in Anchor anchor, in Tag tag,
|
|||
in Tuple!(bool, bool) implicit, in string value,
|
||||
in ScalarStyle style = ScalarStyle.Invalid) pure
|
||||
{
|
||||
return Event(value, start, end, anchor, tag, EventID.Scalar, style, implicit[0],
|
||||
implicit[1]);
|
||||
Event result;
|
||||
result.value = value;
|
||||
result.startMark = start;
|
||||
result.endMark = end;
|
||||
result.anchor = anchor;
|
||||
result.tag = tag;
|
||||
result.id = EventID.Scalar;
|
||||
result.scalarStyle = style;
|
||||
result.implicit = implicit[0];
|
||||
result.implicit_2 = implicit[1];
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@ module dyaml.loader;
|
|||
import std.exception;
|
||||
import std.stream;
|
||||
|
||||
import dyaml.anchor;
|
||||
import dyaml.composer;
|
||||
import dyaml.constructor;
|
||||
import dyaml.event;
|
||||
|
@ -23,7 +22,6 @@ import dyaml.parser;
|
|||
import dyaml.reader;
|
||||
import dyaml.resolver;
|
||||
import dyaml.scanner;
|
||||
import dyaml.tagdirectives;
|
||||
import dyaml.token;
|
||||
|
||||
|
||||
|
@ -114,6 +112,8 @@ struct Loader
|
|||
Constructor constructor_;
|
||||
///Name of the input file or stream, used in error messages.
|
||||
string name_ = "<unknown>";
|
||||
///Are we done loading?
|
||||
bool done_ = false;
|
||||
|
||||
public:
|
||||
@disable this();
|
||||
|
@ -127,7 +127,7 @@ struct Loader
|
|||
*
|
||||
* Throws: YAMLException if the file could not be opened or read.
|
||||
*/
|
||||
this(in string filename)
|
||||
this(string filename)
|
||||
{
|
||||
name_ = filename;
|
||||
try{this(new File(filename));}
|
||||
|
@ -154,8 +154,6 @@ struct Loader
|
|||
parser_ = new Parser(scanner_);
|
||||
resolver_ = defaultResolver_;
|
||||
constructor_ = defaultConstructor_;
|
||||
Anchor.addReference();
|
||||
TagDirectives.addReference();
|
||||
}
|
||||
catch(YAMLException e)
|
||||
{
|
||||
|
@ -167,8 +165,6 @@ struct Loader
|
|||
///Destroy the Loader.
|
||||
~this()
|
||||
{
|
||||
Anchor.removeReference();
|
||||
TagDirectives.removeReference();
|
||||
clear(reader_);
|
||||
clear(scanner_);
|
||||
clear(parser_);
|
||||
|
@ -197,15 +193,23 @@ struct Loader
|
|||
*
|
||||
* If none or more than one YAML document is found, this throws a YAMLException.
|
||||
*
|
||||
* This can only be called once; this is enforced by contract.
|
||||
*
|
||||
* Returns: Root node of the document.
|
||||
*
|
||||
* Throws: YAMLException if there wasn't exactly one document
|
||||
* or on a YAML parsing error.
|
||||
*/
|
||||
Node load()
|
||||
in
|
||||
{
|
||||
assert(!done_, "Loader: Trying to load YAML twice");
|
||||
}
|
||||
body
|
||||
{
|
||||
try
|
||||
{
|
||||
scope(exit){done_ = true;}
|
||||
auto composer = new Composer(parser_, resolver_, constructor_);
|
||||
enforce(composer.checkNode(), new YAMLException("No YAML document to load"));
|
||||
return composer.getSingleNode();
|
||||
|
@ -225,6 +229,8 @@ struct Loader
|
|||
* vice versa will not return any documents, as they have all been parsed
|
||||
* already.
|
||||
*
|
||||
* This can only be called once; this is enforced by contract.
|
||||
*
|
||||
* Returns: Array of root nodes of all documents in the file/stream.
|
||||
*
|
||||
* Throws: YAMLException on a parsing error.
|
||||
|
@ -241,10 +247,18 @@ struct Loader
|
|||
*
|
||||
* Parses documents lazily, when they are needed.
|
||||
*
|
||||
* Foreach over a Loader can only be used once; this is enforced by contract.
|
||||
*
|
||||
* Throws: YAMLException on a parsing error.
|
||||
*/
|
||||
int opApply(int delegate(ref Node) dg)
|
||||
in
|
||||
{
|
||||
assert(!done_, "Loader: Trying to load YAML twice");
|
||||
}
|
||||
body
|
||||
{
|
||||
scope(exit){done_ = true;}
|
||||
try
|
||||
{
|
||||
auto composer = new Composer(parser_, resolver_, constructor_);
|
||||
|
@ -284,11 +298,11 @@ struct Loader
|
|||
}
|
||||
|
||||
//Parse and return all events. Used for debugging.
|
||||
Event[] parse()
|
||||
immutable(Event)[] parse()
|
||||
{
|
||||
try
|
||||
{
|
||||
Event[] result;
|
||||
immutable(Event)[] result;
|
||||
while(parser_.checkEvent()){result ~= parser_.getEvent();}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import dyaml.scanner;
|
|||
import dyaml.style;
|
||||
import dyaml.token;
|
||||
import dyaml.tag;
|
||||
import dyaml.tagdirectives;
|
||||
import dyaml.tagdirective;
|
||||
|
||||
|
||||
package:
|
||||
|
@ -108,10 +108,10 @@ final class Parser
|
|||
{
|
||||
private:
|
||||
///Default tag handle shortcuts and replacements.
|
||||
static tagDirective[] defaultTagDirectives_;
|
||||
static TagDirective[] defaultTagDirectives_;
|
||||
static this()
|
||||
{
|
||||
defaultTagDirectives_ = [tagDirective("!", "!"), tagDirective("!!", "tag:yaml.org,2002:")];
|
||||
defaultTagDirectives_ = [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")];
|
||||
}
|
||||
|
||||
///Scanner providing YAML tokens.
|
||||
|
@ -123,7 +123,7 @@ final class Parser
|
|||
///YAML version string.
|
||||
string YAMLVersion_ = null;
|
||||
///Tag handle shortcuts and replacements.
|
||||
tagDirective[] tagDirectives_;
|
||||
TagDirective[] tagDirectives_;
|
||||
|
||||
///Stack of states.
|
||||
Array!(Event delegate()) states_;
|
||||
|
@ -193,13 +193,13 @@ final class Parser
|
|||
*
|
||||
* Must not be called if there are no events left.
|
||||
*/
|
||||
Event peekEvent()
|
||||
immutable(Event) peekEvent()
|
||||
{
|
||||
if(currentEvent_.isNull && state_ !is null)
|
||||
{
|
||||
currentEvent_ = state_();
|
||||
}
|
||||
if(!currentEvent_.isNull){return currentEvent_;}
|
||||
if(!currentEvent_.isNull){return cast(immutable Event)currentEvent_;}
|
||||
assert(false, "No event left to peek");
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,7 @@ final class Parser
|
|||
*
|
||||
* Must not be called if there are no events left.
|
||||
*/
|
||||
Event getEvent()
|
||||
immutable(Event) getEvent()
|
||||
{
|
||||
//Get the next event and proceed further.
|
||||
if(currentEvent_.isNull && state_ !is null)
|
||||
|
@ -218,7 +218,7 @@ final class Parser
|
|||
|
||||
if(!currentEvent_.isNull)
|
||||
{
|
||||
immutable Event result = currentEvent_;
|
||||
immutable Event result = cast(immutable Event)currentEvent_;
|
||||
currentEvent_.id = EventID.Invalid;
|
||||
return result;
|
||||
}
|
||||
|
@ -273,7 +273,7 @@ final class Parser
|
|||
states_ ~= &parseDocumentEnd;
|
||||
state_ = &parseBlockNode;
|
||||
|
||||
return documentStartEvent(token.startMark, token.endMark, false, null, TagDirectives());
|
||||
return documentStartEvent(token.startMark, token.endMark, false, null, null);
|
||||
}
|
||||
return parseDocumentStart();
|
||||
}
|
||||
|
@ -336,7 +336,7 @@ final class Parser
|
|||
}
|
||||
|
||||
///Process directives at the beginning of a document.
|
||||
TagDirectives processDirectives()
|
||||
TagDirective[] processDirectives()
|
||||
{
|
||||
//Destroy version and tag handles from previous document.
|
||||
YAMLVersion_ = null;
|
||||
|
@ -367,21 +367,21 @@ final class Parser
|
|||
foreach(ref pair; tagDirectives_)
|
||||
{
|
||||
//handle
|
||||
const h = pair[0];
|
||||
const h = pair.handle;
|
||||
enforce(h != handle, new Error("Duplicate tag handle: " ~ handle,
|
||||
token.startMark));
|
||||
}
|
||||
tagDirectives_ ~= tagDirective(handle, parts[2]);
|
||||
tagDirectives_ ~= TagDirective(handle, parts[2]);
|
||||
}
|
||||
}
|
||||
|
||||
TagDirectives value = tagDirectives_.length == 0 ? TagDirectives() : TagDirectives(tagDirectives_);
|
||||
TagDirective[] value = tagDirectives_;
|
||||
|
||||
//Add any default tag handles that haven't been overridden.
|
||||
foreach(ref defaultPair; defaultTagDirectives_)
|
||||
{
|
||||
bool found = false;
|
||||
foreach(ref pair; tagDirectives_) if(defaultPair[0] == pair[0])
|
||||
foreach(ref pair; tagDirectives_) if(defaultPair.handle == pair.handle)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
|
@ -540,10 +540,9 @@ final class Parser
|
|||
string replacement = null;
|
||||
foreach(ref pair; tagDirectives_)
|
||||
{
|
||||
//pair[0] is handle, pair[1] replacement.
|
||||
if(pair[0] == handle)
|
||||
if(pair.handle == handle)
|
||||
{
|
||||
replacement = pair[1];
|
||||
replacement = pair.prefix;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ struct Queue(T)
|
|||
///Linked list node containing one element and pointer to the next node.
|
||||
struct Node
|
||||
{
|
||||
T payload_ = T.init;
|
||||
T payload_;
|
||||
Node* next_ = null;
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ struct Queue(T)
|
|||
}
|
||||
|
||||
///Push new item to the queue.
|
||||
void push(in T item)
|
||||
void push(T item)
|
||||
{
|
||||
Node* newLast = allocate!Node(item, cast(Node*)null);
|
||||
if(last_ !is null){last_.next_ = newLast;}
|
||||
|
@ -100,7 +100,7 @@ struct Queue(T)
|
|||
}
|
||||
|
||||
///Insert a new item putting it to specified index in the linked list.
|
||||
void insert(in T item, in size_t idx)
|
||||
void insert(T item, in size_t idx)
|
||||
in
|
||||
{
|
||||
assert(idx <= length_);
|
||||
|
@ -155,7 +155,7 @@ struct Queue(T)
|
|||
}
|
||||
|
||||
///Return the next element in the queue.
|
||||
ref const(T) peek() const
|
||||
ref inout(T) peek() inout
|
||||
in
|
||||
{
|
||||
assert(!empty, "Trying to peek at an element in an empty queue");
|
||||
|
|
652
dyaml/reader.d
652
dyaml/reader.d
|
@ -7,7 +7,9 @@
|
|||
module dyaml.reader;
|
||||
|
||||
|
||||
import core.stdc.stdlib;
|
||||
import core.stdc.string;
|
||||
import core.thread;
|
||||
|
||||
import std.algorithm;
|
||||
import std.conv;
|
||||
|
@ -34,47 +36,30 @@ class ReaderException : YAMLException
|
|||
}
|
||||
}
|
||||
|
||||
///Reads data from a stream and converts it to UTF-32 (dchar) data.
|
||||
///Lazily reads and decodes data from stream, only storing as much as needed at any moment.
|
||||
final class Reader
|
||||
{
|
||||
private:
|
||||
///Input stream.
|
||||
//Input stream.
|
||||
EndianStream stream_;
|
||||
///Allocated space for buffer_.
|
||||
dchar[] bufferAllocated_;
|
||||
///Buffer of currently loaded characters.
|
||||
dchar[] buffer_;
|
||||
///Current position within buffer. Only data after this position can be read.
|
||||
//Allocated space for buffer_.
|
||||
dchar[] bufferAllocated_ = null;
|
||||
//Buffer of currently loaded characters.
|
||||
dchar[] buffer_ = null;
|
||||
//Current position within buffer. Only data after this position can be read.
|
||||
uint bufferOffset_ = 0;
|
||||
///Index of the current character in the stream.
|
||||
//Index of the current character in the stream.
|
||||
size_t charIndex_ = 0;
|
||||
///Encoding of the input stream.
|
||||
Encoding encoding_;
|
||||
///Current line in file.
|
||||
//Current line in file.
|
||||
uint line_;
|
||||
///Current column in file.
|
||||
//Current column in file.
|
||||
uint column_;
|
||||
///Number of bytes still available (not read) in the stream.
|
||||
size_t available_;
|
||||
|
||||
///Capacity of raw buffers.
|
||||
static immutable bufferLength8_ = 8;
|
||||
///Capacity of raw buffers.
|
||||
static immutable bufferLength16_ = bufferLength8_ / 2;
|
||||
|
||||
union
|
||||
{
|
||||
///Buffer to hold UTF-8 data before decoding.
|
||||
char[bufferLength8_ + 1] rawBuffer8_;
|
||||
///Buffer to hold UTF-16 data before decoding.
|
||||
wchar[bufferLength16_ + 1] rawBuffer16_;
|
||||
}
|
||||
///Number of elements held in the used raw buffer.
|
||||
uint rawUsed_ = 0;
|
||||
//Decoder reading data from file and decoding it to UTF-32.
|
||||
UTFFastDecoder decoder_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct a Reader.
|
||||
/*
|
||||
* Construct an AbstractReader.
|
||||
*
|
||||
* Params: stream = Input stream. Must be readable and seekable.
|
||||
*
|
||||
|
@ -89,51 +74,14 @@ final class Reader
|
|||
body
|
||||
{
|
||||
stream_ = new EndianStream(stream);
|
||||
available_ = stream_.available;
|
||||
|
||||
//handle files short enough not to have a BOM
|
||||
if(available_ < 2)
|
||||
{
|
||||
encoding_ = Encoding.UTF_8;
|
||||
return;
|
||||
}
|
||||
|
||||
//readBOM will determine and set stream endianness
|
||||
switch(stream_.readBOM(2))
|
||||
{
|
||||
case -1:
|
||||
//readBOM() eats two more bytes in this case so get them back
|
||||
const wchar bytes = stream_.getcw();
|
||||
rawBuffer8_[0] = cast(char)(bytes % 256);
|
||||
rawBuffer8_[1] = cast(char)(bytes / 256);
|
||||
rawUsed_ = 2;
|
||||
goto case 0;
|
||||
case 0: encoding_ = Encoding.UTF_8; break;
|
||||
case 1, 2:
|
||||
//readBOM() eats two more bytes in this case so get them back
|
||||
encoding_ = Encoding.UTF_16;
|
||||
rawBuffer16_[0] = stream_.getcw();
|
||||
rawUsed_ = 1;
|
||||
enforce(available_ % 2 == 0,
|
||||
new ReaderException("Odd byte count in an UTF-16 stream"));
|
||||
break;
|
||||
case 3, 4:
|
||||
enforce(available_ % 4 == 0,
|
||||
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
|
||||
encoding_ = Encoding.UTF_32;
|
||||
break;
|
||||
default: assert(false, "Unknown UTF BOM");
|
||||
}
|
||||
available_ = stream_.available;
|
||||
|
||||
auto ptr = cast(dchar*)core.stdc.stdlib.malloc(dchar.sizeof * 256);
|
||||
bufferAllocated_ = ptr[0 .. 256];
|
||||
decoder_ = UTFFastDecoder(stream_);
|
||||
}
|
||||
|
||||
///Destroy the Reader.
|
||||
~this()
|
||||
{
|
||||
core.stdc.stdlib.free(bufferAllocated_.ptr);
|
||||
//Delete the buffer, if allocated.
|
||||
if(bufferAllocated_ is null){return;}
|
||||
free(bufferAllocated_.ptr);
|
||||
buffer_ = bufferAllocated_ = null;
|
||||
}
|
||||
|
||||
|
@ -148,12 +96,13 @@ final class Reader
|
|||
* Throws: ReaderException if trying to read past the end of the stream
|
||||
* or if invalid data is read.
|
||||
*/
|
||||
dchar peek(in size_t index = 0)
|
||||
dchar peek(size_t index = 0)
|
||||
{
|
||||
if(buffer_.length <= bufferOffset_ + index + 1)
|
||||
if(buffer_.length < bufferOffset_ + index + 1)
|
||||
{
|
||||
updateBuffer(index + 1);
|
||||
}
|
||||
|
||||
if(buffer_.length <= bufferOffset_ + index)
|
||||
{
|
||||
throw new ReaderException("Trying to read past the end of the stream");
|
||||
|
@ -172,7 +121,7 @@ final class Reader
|
|||
*
|
||||
* Returns: Characters starting at current position or an empty slice if out of bounds.
|
||||
*/
|
||||
const(dstring) prefix(in size_t length)
|
||||
const(dstring) prefix(size_t length)
|
||||
{
|
||||
return slice(0, length);
|
||||
}
|
||||
|
@ -194,12 +143,12 @@ final class Reader
|
|||
{
|
||||
updateBuffer(end);
|
||||
}
|
||||
|
||||
end += bufferOffset_;
|
||||
start += bufferOffset_;
|
||||
end = min(buffer_.length, end);
|
||||
if(end <= start){return "";}
|
||||
|
||||
return cast(dstring)buffer_[start .. end];
|
||||
return end > start ? cast(dstring)buffer_[start .. end] : "";
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -227,7 +176,7 @@ final class Reader
|
|||
* Throws: ReaderException if trying to read past the end of the stream
|
||||
* or if invalid data is read.
|
||||
*/
|
||||
dstring get(in size_t length)
|
||||
dstring get(size_t length)
|
||||
{
|
||||
auto result = prefix(length).dup;
|
||||
forward(length);
|
||||
|
@ -244,13 +193,13 @@ final class Reader
|
|||
*/
|
||||
void forward(size_t length = 1)
|
||||
{
|
||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||
|
||||
if(buffer_.length <= bufferOffset_ + length + 1)
|
||||
{
|
||||
updateBuffer(length + 1);
|
||||
}
|
||||
|
||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||
|
||||
while(length > 0)
|
||||
{
|
||||
const c = buffer_[bufferOffset_];
|
||||
|
@ -268,19 +217,19 @@ final class Reader
|
|||
}
|
||||
|
||||
///Get a string describing current stream position, used for error messages.
|
||||
@property Mark mark() const {return Mark(line_, column_);}
|
||||
@property final Mark mark() const {return Mark(line_, column_);}
|
||||
|
||||
///Get current line number.
|
||||
@property uint line() const {return line_;}
|
||||
@property final uint line() const {return line_;}
|
||||
|
||||
///Get current line number.
|
||||
@property uint column() const {return column_;}
|
||||
///Get current column number.
|
||||
@property final uint column() const {return column_;}
|
||||
|
||||
///Get index of the current character in the stream.
|
||||
@property size_t charIndex() const {return charIndex_;}
|
||||
@property final size_t charIndex() const {return charIndex_;}
|
||||
|
||||
///Get encoding of the input stream.
|
||||
@property Encoding encoding() const {return encoding_;}
|
||||
@property final Encoding encoding() const {return decoder_.encoding;}
|
||||
|
||||
private:
|
||||
/**
|
||||
|
@ -296,7 +245,7 @@ final class Reader
|
|||
*/
|
||||
void updateBuffer(in size_t length)
|
||||
{
|
||||
//get rid of unneeded data in the buffer
|
||||
//Get rid of unneeded data in the buffer.
|
||||
if(bufferOffset_ > 0)
|
||||
{
|
||||
size_t bufferLength = buffer_.length - bufferOffset_;
|
||||
|
@ -306,12 +255,12 @@ final class Reader
|
|||
bufferOffset_ = 0;
|
||||
}
|
||||
|
||||
////Load chars in batches of at most 1024 bytes (256 chars)
|
||||
//Load chars in batches of at most 1024 bytes (256 chars)
|
||||
while(buffer_.length <= bufferOffset_ + length)
|
||||
{
|
||||
loadChars(256);
|
||||
loadChars(512);
|
||||
|
||||
if(done)
|
||||
if(decoder_.done)
|
||||
{
|
||||
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
|
||||
{
|
||||
|
@ -325,9 +274,11 @@ final class Reader
|
|||
}
|
||||
|
||||
/**
|
||||
* Load at most specified number of characters.
|
||||
* Load more characters to the buffer.
|
||||
*
|
||||
* Params: chars = Maximum number of characters to load.
|
||||
* Params: chars = Recommended number of characters to load.
|
||||
* More characters might be loaded.
|
||||
* Less will be loaded if not enough available.
|
||||
*
|
||||
* Throws: ReaderException on Unicode decoding error,
|
||||
* if nonprintable characters are detected, or
|
||||
|
@ -335,96 +286,35 @@ final class Reader
|
|||
*/
|
||||
void loadChars(size_t chars)
|
||||
{
|
||||
///Get next character from the stream.
|
||||
dchar getDChar()
|
||||
{
|
||||
final switch(encoding_)
|
||||
{
|
||||
case Encoding.UTF_8:
|
||||
//Temp buffer for moving data in rawBuffer8_.
|
||||
char[bufferLength8_] temp;
|
||||
//Shortcut for ASCII.
|
||||
if(rawUsed_ > 0 && rawBuffer8_[0] < 128)
|
||||
{
|
||||
//Get the first byte (one char in ASCII).
|
||||
const dchar result = rawBuffer8_[0];
|
||||
--rawUsed_;
|
||||
//Move the data.
|
||||
*(cast(ulong*)temp.ptr) = *(cast(ulong*)(rawBuffer8_.ptr + 1));
|
||||
*(cast(ulong*)rawBuffer8_.ptr) = *(cast(ulong*)temp.ptr);
|
||||
return result;
|
||||
}
|
||||
|
||||
//Bytes to read.
|
||||
const readBytes = min(available_, bufferLength8_ - rawUsed_);
|
||||
available_ -= readBytes;
|
||||
//Length of data in rawBuffer8_ after reading.
|
||||
const len = rawUsed_ + readBytes;
|
||||
//Read the data.
|
||||
stream_.readExact(rawBuffer8_.ptr + rawUsed_, readBytes);
|
||||
|
||||
//After decoding, this will point to the first byte not decoded.
|
||||
size_t idx = 0;
|
||||
const dchar result = decode(rawBuffer8_, idx);
|
||||
rawUsed_ = cast(uint)(len - idx);
|
||||
|
||||
//Move the data.
|
||||
temp[0 .. rawUsed_] = rawBuffer8_[idx .. len];
|
||||
rawBuffer8_[0 .. rawUsed_] = temp[0 .. rawUsed_];
|
||||
return result;
|
||||
case Encoding.UTF_16:
|
||||
//Temp buffer for moving data in rawBuffer8_.
|
||||
wchar[bufferLength16_] temp;
|
||||
//Words to read.
|
||||
size_t readWords = min(available_ / 2, bufferLength16_ - rawUsed_);
|
||||
available_ -= readWords * 2;
|
||||
//Length of data in rawBuffer16_ after reading.
|
||||
size_t len = rawUsed_;
|
||||
//Read the data.
|
||||
while(readWords > 0)
|
||||
{
|
||||
//Due to a bug in std.stream, we have to use getcw here.
|
||||
rawBuffer16_[len] = stream_.getcw();
|
||||
--readWords;
|
||||
++len;
|
||||
}
|
||||
|
||||
//After decoding, this will point to the first word not decoded.
|
||||
size_t idx = 0;
|
||||
const dchar result = decode(rawBuffer16_, idx);
|
||||
rawUsed_ = cast(uint)(len - idx);
|
||||
|
||||
//Move the data.
|
||||
temp[0 .. rawUsed_] = rawBuffer16_[idx .. len];
|
||||
rawBuffer16_[0 .. rawUsed_] = temp[0 .. rawUsed_];
|
||||
return result;
|
||||
case Encoding.UTF_32:
|
||||
dchar result;
|
||||
available_ -= 4;
|
||||
stream_.read(result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
const oldLength = buffer_.length;
|
||||
const oldPosition = stream_.position;
|
||||
|
||||
//Preallocating memory to limit GC reallocations.
|
||||
|
||||
bufferReserve(buffer_.length + chars);
|
||||
buffer_ = bufferAllocated_[0 .. buffer_.length + chars];
|
||||
scope(exit)
|
||||
scope(success)
|
||||
{
|
||||
buffer_ = buffer_[0 .. $ - chars];
|
||||
enforce(printable(buffer_[oldLength .. $]),
|
||||
new ReaderException("Special unicode characters are not allowed"));
|
||||
}
|
||||
|
||||
try for(uint c = 0; chars; --chars, ++c)
|
||||
try for(size_t c = 0; chars && !decoder_.done;)
|
||||
{
|
||||
if(done){break;}
|
||||
buffer_[oldLength + c] = getDChar();
|
||||
const slice = decoder_.getDChars(chars);
|
||||
buffer_[oldLength + c .. oldLength + c + slice.length] = slice;
|
||||
c += slice.length;
|
||||
chars -= slice.length;
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
handleLoadCharsException(e, oldPosition);
|
||||
}
|
||||
}
|
||||
|
||||
//Handle an exception thrown in loadChars method of any Reader.
|
||||
void handleLoadCharsException(Exception e, size_t oldPosition)
|
||||
{
|
||||
try{throw e;}
|
||||
catch(UtfException e)
|
||||
{
|
||||
const position = stream_.position;
|
||||
|
@ -437,94 +327,376 @@ final class Reader
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if all characters in an array are printable.
|
||||
*
|
||||
* Params: chars = Characters to check.
|
||||
*
|
||||
* Returns: True if all the characters are printable, false otherwise.
|
||||
*/
|
||||
static bool printable(const ref dchar[] chars) pure
|
||||
//Code shared by loadEntireFile methods.
|
||||
void loadEntireFile_()
|
||||
{
|
||||
foreach(c; chars)
|
||||
const maxChars = decoder_.maxChars;
|
||||
bufferReserve(maxChars + 1);
|
||||
loadChars(maxChars);
|
||||
|
||||
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
|
||||
{
|
||||
if(!((c == 0x09 || c == 0x0A || c == 0x0D || c == 0x85) ||
|
||||
(c >= 0x20 && c <= 0x7E) ||
|
||||
(c >= 0xA0 && c <= '\uD7FF') ||
|
||||
(c >= '\uE000' && c <= '\uFFFD')))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
buffer_ = bufferAllocated_[0 .. buffer_.length + 1];
|
||||
buffer_[$ - 1] = '\0';
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
///Are we done reading?
|
||||
@property bool done() const
|
||||
{
|
||||
return (available_ == 0 &&
|
||||
((encoding_ == Encoding.UTF_8 && rawUsed_ == 0) ||
|
||||
(encoding_ == Encoding.UTF_16 && rawUsed_ == 0) ||
|
||||
encoding_ == Encoding.UTF_32));
|
||||
}
|
||||
|
||||
///Ensure there is space for at least capacity characters in bufferAllocated_.
|
||||
//Ensure there is space for at least capacity characters in bufferAllocated_.
|
||||
void bufferReserve(in size_t capacity)
|
||||
{
|
||||
if(bufferAllocated_.length >= capacity){return;}
|
||||
if(bufferAllocated_ !is null && bufferAllocated_.length >= capacity){return;}
|
||||
|
||||
auto newPtr = core.stdc.stdlib.realloc(bufferAllocated_.ptr,
|
||||
capacity * dchar.sizeof);
|
||||
bufferAllocated_ = (cast(dchar*)newPtr)[0 .. capacity];
|
||||
//Handle first allocation as well as reallocation.
|
||||
auto ptr = bufferAllocated_ !is null
|
||||
? realloc(bufferAllocated_.ptr, capacity * dchar.sizeof)
|
||||
: malloc(capacity * dchar.sizeof);
|
||||
bufferAllocated_ = (cast(dchar*)ptr)[0 .. capacity];
|
||||
buffer_ = bufferAllocated_[0 .. buffer_.length];
|
||||
}
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
writeln("D:YAML reader endian unittest");
|
||||
void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected)
|
||||
private:
|
||||
|
||||
alias UTFBlockDecoder!512 UTFFastDecoder;
|
||||
|
||||
///Decodes streams to UTF-32 in blocks.
|
||||
struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||
{
|
||||
private:
|
||||
//UTF-8 codepoint strides (0xFF are codepoints that can't start a sequence).
|
||||
static immutable ubyte[256] utf8Stride =
|
||||
[
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
|
||||
];
|
||||
|
||||
//Encoding of the input stream.
|
||||
Encoding encoding_;
|
||||
//Maximum number of characters that might be in the stream.
|
||||
size_t maxChars_;
|
||||
//Bytes available in the stream.
|
||||
size_t available_;
|
||||
//Input stream.
|
||||
EndianStream stream_;
|
||||
|
||||
//Buffer used to store raw UTF-8 or UTF-16 code points.
|
||||
union
|
||||
{
|
||||
auto reader = new Reader(new MemoryStream(data));
|
||||
assert(reader.encoding_ == encoding_expected);
|
||||
assert(reader.stream_.endian == endian_expected);
|
||||
char[bufferSize_] rawBuffer8_;
|
||||
wchar[bufferSize_ / 2] rawBuffer16_;
|
||||
}
|
||||
ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
|
||||
ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A];
|
||||
endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian);
|
||||
endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian);
|
||||
}
|
||||
unittest
|
||||
//Used space (in items) in rawBuffer8_/rawBuffer16_.
|
||||
size_t rawUsed_;
|
||||
|
||||
//Space used by buffer_.
|
||||
dchar[bufferSize_] bufferSpace_;
|
||||
//Buffer of decoded, UTF-32 characters. This is a slice into bufferSpace_.
|
||||
dchar[] buffer_;
|
||||
|
||||
public:
|
||||
///Construct a UTFFastDecoder decoding a stream.
|
||||
this(EndianStream stream)
|
||||
{
|
||||
stream_ = stream;
|
||||
available_ = stream_.available;
|
||||
|
||||
//Handle files short enough not to have a BOM.
|
||||
if(available_ < 2)
|
||||
{
|
||||
encoding_ = Encoding.UTF_8;
|
||||
maxChars_ = 0;
|
||||
|
||||
if(available_ == 1)
|
||||
{
|
||||
bufferSpace_[0] = stream_.getc();
|
||||
buffer_ = bufferSpace_[0 .. 1];
|
||||
maxChars_ = 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
char[] rawBuffer8;
|
||||
wchar[] rawBuffer16;
|
||||
//readBOM will determine and set stream endianness.
|
||||
switch(stream_.readBOM(2))
|
||||
{
|
||||
case -1:
|
||||
//readBOM() eats two more bytes in this case so get them back.
|
||||
const wchar bytes = stream_.getcw();
|
||||
rawBuffer8_[0 .. 2] = [cast(ubyte)(bytes % 256), cast(ubyte)(bytes / 256)];
|
||||
rawUsed_ = 2;
|
||||
goto case 0;
|
||||
case 0:
|
||||
maxChars_ = available_;
|
||||
encoding_ = Encoding.UTF_8;
|
||||
break;
|
||||
case 1, 2:
|
||||
maxChars_ = available_ / 2;
|
||||
//readBOM() eats two more bytes in this case so get them back.
|
||||
encoding_ = Encoding.UTF_16;
|
||||
rawBuffer16_[0] = stream_.getcw();
|
||||
rawUsed_ = 1;
|
||||
enforce(available_ % 2 == 0,
|
||||
new ReaderException("Odd byte count in an UTF-16 stream"));
|
||||
break;
|
||||
case 3, 4:
|
||||
maxChars_ = available_ / 4;
|
||||
encoding_ = Encoding.UTF_32;
|
||||
enforce(available_ % 4 == 0,
|
||||
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
|
||||
break;
|
||||
default: assert(false, "Unknown UTF BOM");
|
||||
}
|
||||
available_ = stream_.available;
|
||||
}
|
||||
|
||||
///Get maximum number of characters that might be in the stream.
|
||||
@property size_t maxChars() const {return maxChars_;}
|
||||
|
||||
///Get encoding we're decoding from.
|
||||
@property Encoding encoding() const {return encoding_;}
|
||||
|
||||
///Are we done decoding?
|
||||
@property bool done() const
|
||||
{
|
||||
return rawUsed_ == 0 && buffer_.length == 0 && available_ == 0;
|
||||
}
|
||||
|
||||
///Get next character.
|
||||
dchar getDChar()
|
||||
{
|
||||
if(buffer_.length)
|
||||
{
|
||||
const result = buffer_[0];
|
||||
buffer_ = buffer_[1 .. $];
|
||||
return result;
|
||||
}
|
||||
|
||||
assert(available_ > 0 || rawUsed_ > 0);
|
||||
updateBuffer();
|
||||
return getDChar();
|
||||
}
|
||||
|
||||
///Get as many characters as possible, but at most maxChars. Slice returned will be invalidated in further calls.
|
||||
const(dchar[]) getDChars(size_t maxChars = size_t.max)
|
||||
{
|
||||
if(buffer_.length)
|
||||
{
|
||||
const slice = min(buffer_.length, maxChars);
|
||||
const result = buffer_[0 .. slice];
|
||||
buffer_ = buffer_[slice .. $];
|
||||
return result;
|
||||
}
|
||||
|
||||
assert(available_ > 0 || rawUsed_ > 0);
|
||||
updateBuffer();
|
||||
return getDChars(maxChars);
|
||||
}
|
||||
|
||||
private:
|
||||
//Read and decode characters from file and store them in the buffer.
|
||||
void updateBuffer()
|
||||
{
|
||||
assert(buffer_.length == 0);
|
||||
final switch(encoding_)
|
||||
{
|
||||
case Encoding.UTF_8:
|
||||
const bytes = min(bufferSize_ - rawUsed_, available_);
|
||||
//Current length of valid data in rawBuffer8_.
|
||||
const rawLength = rawUsed_ + bytes;
|
||||
stream_.readExact(rawBuffer8_.ptr + rawUsed_, bytes);
|
||||
available_ -= bytes;
|
||||
decodeRawBuffer(rawBuffer8_, rawLength);
|
||||
break;
|
||||
|
||||
case Encoding.UTF_16:
|
||||
const words = min((bufferSize_ / 2) - rawUsed_, available_ / 2);
|
||||
//Current length of valid data in rawBuffer16_.
|
||||
const rawLength = rawUsed_ + words;
|
||||
foreach(c; rawUsed_ .. rawLength)
|
||||
{
|
||||
stream_.read(rawBuffer16_[c]);
|
||||
available_ -= 2;
|
||||
}
|
||||
decodeRawBuffer(rawBuffer16_, rawLength);
|
||||
break;
|
||||
|
||||
case Encoding.UTF_32:
|
||||
const chars = min(bufferSize_ / 4, available_ / 4);
|
||||
foreach(c; 0 .. chars)
|
||||
{
|
||||
stream_.read(bufferSpace_[c]);
|
||||
available_ -= 4;
|
||||
}
|
||||
buffer_ = bufferSpace_[0 .. chars];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//Decode contents of a UTF-8 or UTF-16 raw buffer.
|
||||
void decodeRawBuffer(C)(C[] buffer, const size_t length)
|
||||
{
|
||||
//End of part of rawBuffer8_ that contains
|
||||
//complete characters and can be decoded.
|
||||
const end = endOfLastUTFSequence(buffer, length);
|
||||
//If end is 0, there are no full UTF-8 chars.
|
||||
//This can happen at the end of file if there is an incomplete UTF-8 sequence.
|
||||
enforce(end > 0,
|
||||
new ReaderException("Invalid UTF-8 character at the end of stream"));
|
||||
|
||||
decodeUTF(buffer[0 .. end]);
|
||||
|
||||
//After decoding, any code points not decoded go to the start of raw buffer.
|
||||
rawUsed_ = length - end;
|
||||
foreach(i; 0 .. rawUsed_){buffer[i] = buffer[i + end];}
|
||||
}
|
||||
|
||||
//Determine the end of last UTF-8 or UTF-16 sequence in a raw buffer.
|
||||
size_t endOfLastUTFSequence(C)(const C[] buffer, const size_t max)
|
||||
{
|
||||
static if(is(C == char))
|
||||
{
|
||||
for(long end = max - 1; end >= 0; --end)
|
||||
{
|
||||
const s = utf8Stride[buffer[end]];
|
||||
if(s != 0xFF)
|
||||
{
|
||||
//If stride goes beyond end of the buffer (max), return end.
|
||||
//Otherwise the last sequence ends at max, so we can return that.
|
||||
//(Unless there is an invalid code point, which is
|
||||
//caught at decoding)
|
||||
return (s > max - end) ? cast(size_t)end : max;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t end = 0;
|
||||
while(end < max)
|
||||
{
|
||||
const s = stride(buffer, end);
|
||||
if(s + end > max){break;}
|
||||
end += s;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
}
|
||||
|
||||
//Decode a UTF-8 or UTF-16 buffer (with no incomplete sequences at the end).
|
||||
void decodeUTF(C)(const C[] source)
|
||||
{
|
||||
size_t bufpos = 0;
|
||||
const srclength = source.length;
|
||||
for(size_t srcpos = 0; srcpos < srclength;)
|
||||
{
|
||||
const c = source[srcpos];
|
||||
if(c < 0x80)
|
||||
{
|
||||
bufferSpace_[bufpos++] = c;
|
||||
++srcpos;
|
||||
}
|
||||
else
|
||||
{
|
||||
bufferSpace_[bufpos++] = decode(source, srcpos);
|
||||
}
|
||||
}
|
||||
buffer_ = bufferSpace_[0 .. bufpos];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if all characters in an array are printable.
|
||||
*
|
||||
* Params: chars = Characters to check.
|
||||
*
|
||||
* Returns: True if all the characters are printable, false otherwise.
|
||||
*/
|
||||
bool printable(const ref dchar[] chars) pure
|
||||
{
|
||||
foreach(c; chars)
|
||||
{
|
||||
writeln("D:YAML reader peek/prefix/forward unittest");
|
||||
ubyte[] data = ByteOrderMarks[BOM.UTF8] ~ cast(ubyte[])"data";
|
||||
auto reader = new Reader(new MemoryStream(data));
|
||||
if(!((c == 0x09 || c == 0x0A || c == 0x0D || c == 0x85) ||
|
||||
(c >= 0x20 && c <= 0x7E) ||
|
||||
(c >= 0xA0 && c <= '\uD7FF') ||
|
||||
(c >= '\uE000' && c <= '\uFFFD')))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//Unittests.
|
||||
|
||||
void testEndian(R)()
|
||||
{
|
||||
writeln(typeid(R).toString() ~ ": endian unittest");
|
||||
void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected)
|
||||
{
|
||||
Reader reader = new R(new MemoryStream(data));
|
||||
assert(reader.encoding == encoding_expected);
|
||||
assert(reader.stream_.endian == endian_expected);
|
||||
}
|
||||
ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
|
||||
ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A];
|
||||
endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian);
|
||||
endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian);
|
||||
}
|
||||
|
||||
void testPeekPrefixForward(R)()
|
||||
{
|
||||
writeln(typeid(R).toString() ~ ": peek/prefix/forward unittest");
|
||||
ubyte[] data = ByteOrderMarks[BOM.UTF8] ~ cast(ubyte[])"data";
|
||||
Reader reader = new R(new MemoryStream(data));
|
||||
assert(reader.peek() == 'd');
|
||||
assert(reader.peek(1) == 'a');
|
||||
assert(reader.peek(2) == 't');
|
||||
assert(reader.peek(3) == 'a');
|
||||
assert(reader.peek(4) == '\0');
|
||||
assert(reader.prefix(4) == "data");
|
||||
assert(reader.prefix(6) == "data\0");
|
||||
reader.forward(2);
|
||||
assert(reader.peek(1) == 'a');
|
||||
assert(collectException(reader.peek(3)));
|
||||
}
|
||||
|
||||
void testUTF(R)()
|
||||
{
|
||||
writeln(typeid(R).toString() ~ ": UTF formats unittest");
|
||||
dchar[] data = cast(dchar[])"data";
|
||||
void utf_test(T)(T[] data, BOM bom)
|
||||
{
|
||||
ubyte[] bytes = ByteOrderMarks[bom] ~
|
||||
(cast(ubyte*)data.ptr)[0 .. data.length * T.sizeof];
|
||||
Reader reader = new R(new MemoryStream(bytes));
|
||||
assert(reader.peek() == 'd');
|
||||
assert(reader.peek(1) == 'a');
|
||||
assert(reader.peek(2) == 't');
|
||||
assert(reader.peek(3) == 'a');
|
||||
assert(reader.peek(4) == '\0');
|
||||
assert(reader.prefix(4) == "data");
|
||||
assert(reader.prefix(6) == "data\0");
|
||||
reader.forward(2);
|
||||
assert(reader.peek(1) == 'a');
|
||||
assert(collectException(reader.peek(3)));
|
||||
}
|
||||
unittest
|
||||
{
|
||||
writeln("D:YAML reader UTF formats unittest");
|
||||
dchar[] data = cast(dchar[])"data";
|
||||
void utf_test(T)(T[] data, BOM bom)
|
||||
{
|
||||
ubyte[] bytes = ByteOrderMarks[bom] ~
|
||||
(cast(ubyte*)data.ptr)[0 .. data.length * T.sizeof];
|
||||
auto reader = new Reader(new MemoryStream(bytes));
|
||||
assert(reader.peek() == 'd');
|
||||
assert(reader.peek(1) == 'a');
|
||||
assert(reader.peek(2) == 't');
|
||||
assert(reader.peek(3) == 'a');
|
||||
}
|
||||
utf_test!char(to!(char[])(data), BOM.UTF8);
|
||||
utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.UTF16BE : BOM.UTF16LE);
|
||||
utf_test(data, endian == Endian.bigEndian ? BOM.UTF32BE : BOM.UTF32LE);
|
||||
}
|
||||
utf_test!char(to!(char[])(data), BOM.UTF8);
|
||||
utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.UTF16BE : BOM.UTF16LE);
|
||||
utf_test(data, endian == Endian.bigEndian ? BOM.UTF32BE : BOM.UTF32LE);
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
testEndian!Reader();
|
||||
testPeekPrefixForward!Reader();
|
||||
testUTF!Reader();
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ final class Representer
|
|||
* disabled to use custom representer
|
||||
* functions for default types.
|
||||
*/
|
||||
this(in bool useDefaultRepresenters = true)
|
||||
this(bool useDefaultRepresenters = true)
|
||||
{
|
||||
if(!useDefaultRepresenters){return;}
|
||||
addRepresenter!YAMLNull(&representNull);
|
||||
|
@ -87,13 +87,13 @@ final class Representer
|
|||
}
|
||||
|
||||
///Set default _style for scalars. Invalid means the _style is chosen automatically.
|
||||
@property void defaultScalarStyle(in ScalarStyle style)
|
||||
@property void defaultScalarStyle(ScalarStyle style)
|
||||
{
|
||||
defaultScalarStyle_ = style;
|
||||
}
|
||||
|
||||
///Set default _style for collections. Invalid means the _style is chosen automatically.
|
||||
@property void defaultCollectionStyle(in CollectionStyle style)
|
||||
@property void defaultCollectionStyle(CollectionStyle style)
|
||||
{
|
||||
defaultCollectionStyle_ = style;
|
||||
}
|
||||
|
@ -237,7 +237,7 @@ final class Representer
|
|||
* }
|
||||
* --------------------
|
||||
*/
|
||||
Node representScalar(in string tag, string scalar,
|
||||
Node representScalar(string tag, string scalar,
|
||||
ScalarStyle style = ScalarStyle.Invalid)
|
||||
{
|
||||
if(style == ScalarStyle.Invalid){style = defaultScalarStyle_;}
|
||||
|
@ -276,7 +276,7 @@ final class Representer
|
|||
* }
|
||||
* --------------------
|
||||
*/
|
||||
Node representSequence(in string tag, Node[] sequence,
|
||||
Node representSequence(string tag, Node[] sequence,
|
||||
CollectionStyle style = CollectionStyle.Invalid)
|
||||
{
|
||||
Node[] value;
|
||||
|
@ -335,7 +335,7 @@ final class Representer
|
|||
* }
|
||||
* --------------------
|
||||
*/
|
||||
Node representMapping(in string tag, Node.Pair[] pairs,
|
||||
Node representMapping(string tag, Node.Pair[] pairs,
|
||||
CollectionStyle style = CollectionStyle.Invalid)
|
||||
{
|
||||
Node.Pair[] value;
|
||||
|
|
|
@ -23,7 +23,7 @@ import dyaml.exception;
|
|||
import dyaml.node;
|
||||
import dyaml.resolver;
|
||||
import dyaml.tag;
|
||||
import dyaml.tagdirectives;
|
||||
import dyaml.tagdirective;
|
||||
import dyaml.token;
|
||||
|
||||
|
||||
|
@ -46,7 +46,7 @@ struct Serializer
|
|||
string YAMLVersion_;
|
||||
|
||||
///Tag directives to emit.
|
||||
TagDirectives tagDirectives_;
|
||||
TagDirective[] tagDirectives_;
|
||||
|
||||
//TODO Use something with more deterministic memory usage.
|
||||
///Nodes with assigned anchors.
|
||||
|
@ -70,7 +70,7 @@ struct Serializer
|
|||
*/
|
||||
this(ref Emitter emitter, Resolver resolver, Encoding encoding,
|
||||
in bool explicitStart, in bool explicitEnd, string YAMLVersion,
|
||||
TagDirectives tagDirectives)
|
||||
TagDirective[] tagDirectives)
|
||||
{
|
||||
emitter_ = &emitter;
|
||||
resolver_ = resolver;
|
||||
|
|
|
@ -1,131 +0,0 @@
|
|||
|
||||
// Copyright Ferdinand Majerech 2011.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
///Shared object.
|
||||
module dyaml.sharedobject;
|
||||
|
||||
|
||||
/**
|
||||
* Mixin for shared objects (need a better name).
|
||||
*
|
||||
* This works as an index to a static array of type T. Any new object created is
|
||||
* checked for presence in the array to prevent duplication.
|
||||
*
|
||||
* This is useful for e.g. token/event data that rarely needs to be
|
||||
* stored (e.g. tag directives) to prevent inflation of these structs,
|
||||
* and when there are many instances of a data type that are mostly
|
||||
* duplicates (e.g. tags).
|
||||
*
|
||||
* This is not the most elegant way to store the extra data and might change in future.
|
||||
*/
|
||||
template SharedObject(T, MixedIn)
|
||||
{
|
||||
private:
|
||||
///This class stores the data that is shared between the objects.
|
||||
class SharedData
|
||||
{
|
||||
private:
|
||||
/**
|
||||
* Reference count.
|
||||
*
|
||||
* When this reaches zero, objects_ are cleared. This is not
|
||||
* the number of shared objects, but rather of objects using this kind
|
||||
* of shared object.
|
||||
*/
|
||||
int referenceCount_ = 0;
|
||||
|
||||
///All known objects of type T are in this array.
|
||||
T[] objects_;
|
||||
|
||||
public:
|
||||
///Increment the reference count.
|
||||
void addReference()
|
||||
{
|
||||
assert(referenceCount_ >= 0);
|
||||
++referenceCount_;
|
||||
}
|
||||
|
||||
///Decrement the reference count and clear the constructed objects if zero.
|
||||
void removeReference()
|
||||
{
|
||||
--referenceCount_;
|
||||
assert(referenceCount_ >= 0);
|
||||
if(referenceCount_ == 0)
|
||||
{
|
||||
clear(objects_);
|
||||
objects_ = [];
|
||||
}
|
||||
}
|
||||
|
||||
///Add an object and return its index.
|
||||
uint add(ref T object)
|
||||
{
|
||||
foreach(index, ref known; objects_) if(object == known)
|
||||
{
|
||||
return cast(uint)index;
|
||||
}
|
||||
objects_ ~= object;
|
||||
return cast(uint)objects_.length - 1;
|
||||
}
|
||||
|
||||
///Get the object at specified object.
|
||||
@property T get(in uint index)
|
||||
{
|
||||
return objects_[index];
|
||||
}
|
||||
}
|
||||
|
||||
///Index of the object in data_.
|
||||
uint index_ = uint.max;
|
||||
|
||||
///Stores the actual objects.
|
||||
static __gshared SharedData data_;
|
||||
|
||||
static this()
|
||||
{
|
||||
data_ = new SharedData;
|
||||
}
|
||||
|
||||
public:
|
||||
///Increment the reference count.
|
||||
static void addReference()
|
||||
{
|
||||
synchronized(data_){data_.addReference();}
|
||||
}
|
||||
|
||||
///Decrement the reference count and clear the constructed objects if zero.
|
||||
static void removeReference()
|
||||
{
|
||||
synchronized(data_){data_.removeReference();}
|
||||
}
|
||||
|
||||
///Get the object.
|
||||
@property T get() const
|
||||
in{assert(!isNull());}
|
||||
body
|
||||
{
|
||||
T result;
|
||||
synchronized(data_){result = data_.get(index_);}
|
||||
return result;
|
||||
}
|
||||
|
||||
///Test for equality with another object.
|
||||
bool opEquals(const ref MixedIn object) const
|
||||
{
|
||||
return object.index_ == index_;
|
||||
}
|
||||
|
||||
///Is this object null (invalid)?
|
||||
@property bool isNull() const {return index_ == uint.max;}
|
||||
|
||||
private:
|
||||
///Add a new object, checking if identical object already exists.
|
||||
void add(ref T object)
|
||||
{
|
||||
synchronized(data_){index_ = data_.add(object);}
|
||||
}
|
||||
}
|
||||
|
65
dyaml/tag.d
65
dyaml/tag.d
|
@ -7,66 +7,7 @@
|
|||
///YAML tag.
|
||||
module dyaml.tag;
|
||||
|
||||
import dyaml.zerostring;
|
||||
|
||||
import core.stdc.string;
|
||||
|
||||
|
||||
///YAML tag (data type) struct. Encapsulates a tag to save memory and speed-up comparison.
|
||||
struct Tag
|
||||
{
|
||||
private:
|
||||
///Zero terminated tag string.
|
||||
immutable(char)* tag_ = null;
|
||||
|
||||
public:
|
||||
@disable int opCmp(ref Tag);
|
||||
|
||||
|
||||
///Construct a tag from a string representation.
|
||||
this(in string tag)
|
||||
{
|
||||
if(tag is null || tag == "")
|
||||
{
|
||||
tag_ = null;
|
||||
return;
|
||||
}
|
||||
|
||||
tag_ = (tag ~ '\0').ptr;
|
||||
}
|
||||
|
||||
///Get the tag string.
|
||||
@property string get() const
|
||||
in{assert(!isNull());}
|
||||
body
|
||||
{
|
||||
return cast(string)tag_[0 .. strlen(tag_)];
|
||||
}
|
||||
|
||||
///Test for equality with another tag.
|
||||
bool opEquals(const ref Tag tag) const
|
||||
{
|
||||
return isNull ? tag.isNull :
|
||||
tag.isNull ? false : (0 == strcmp(tag_, tag.tag_));
|
||||
}
|
||||
|
||||
///Compute a hash.
|
||||
hash_t toHash() const
|
||||
in{assert(!isNull);}
|
||||
body
|
||||
{
|
||||
static type = typeid(string);
|
||||
auto str = get();
|
||||
return type.getHash(&str);
|
||||
}
|
||||
|
||||
///Compare with another tag.
|
||||
int opCmp(const ref Tag tag) const
|
||||
in{assert(!isNull && !tag.isNull);}
|
||||
body
|
||||
{
|
||||
return strcmp(tag_, tag.tag_);
|
||||
}
|
||||
|
||||
///Is this tag null (invalid)?
|
||||
@property bool isNull() const {return tag_ is null;}
|
||||
}
|
||||
///YAML tag (data type) struct. Encapsulates a tag to save memory and speed up comparison.
|
||||
alias ZeroString!"Tag" Tag;
|
||||
|
|
15
dyaml/tagdirective.d
Normal file
15
dyaml/tagdirective.d
Normal file
|
@ -0,0 +1,15 @@
|
|||
|
||||
// Copyright Ferdinand Majerech 2011.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
///Tag directives.
|
||||
module dyaml.tagdirective;
|
||||
|
||||
///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it.
|
||||
struct TagDirective
|
||||
{
|
||||
string handle;
|
||||
string prefix;
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
// Copyright Ferdinand Majerech 2011.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
///Tag directives.
|
||||
module dyaml.tagdirectives;
|
||||
|
||||
import std.typecons;
|
||||
|
||||
import dyaml.sharedobject;
|
||||
|
||||
///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it.
|
||||
alias Tuple!(string, "handle", string, "prefix") tagDirective;
|
||||
|
||||
///Tag directives stored in Event.
|
||||
struct TagDirectives
|
||||
{
|
||||
public:
|
||||
mixin SharedObject!(tagDirective[], TagDirectives);
|
||||
|
||||
///Construct a tags object from an array of tag directives.
|
||||
this(tagDirective[] tagDirectives)
|
||||
{
|
||||
add(tagDirectives);
|
||||
}
|
||||
}
|
73
dyaml/zerostring.d
Normal file
73
dyaml/zerostring.d
Normal file
|
@ -0,0 +1,73 @@
|
|||
|
||||
// Copyright Ferdinand Majerech 2011.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
///Zero terminated string.
|
||||
module dyaml.zerostring;
|
||||
|
||||
import core.stdc.string;
|
||||
|
||||
/**
|
||||
* Zero terminated string used to decrease data structure size.
|
||||
*
|
||||
* TypeName is used to differentiate types (better than simple alias).
|
||||
*/
|
||||
struct ZeroString(string TypeName)
|
||||
{
|
||||
private:
|
||||
///Zero terminated string.
|
||||
immutable(char)* str_ = null;
|
||||
|
||||
public:
|
||||
@disable int opCmp(ref ZeroString);
|
||||
|
||||
///Construct a string.
|
||||
this(in string str)
|
||||
{
|
||||
if(str is null || str == "")
|
||||
{
|
||||
str_ = null;
|
||||
return;
|
||||
}
|
||||
|
||||
str_ = (str ~ '\0').ptr;
|
||||
}
|
||||
|
||||
///Get the string.
|
||||
@property string get() const
|
||||
in{assert(!isNull());}
|
||||
body
|
||||
{
|
||||
return cast(string)str_[0 .. strlen(str_)];
|
||||
}
|
||||
|
||||
///Test for equality with another string.
|
||||
bool opEquals(const ref ZeroString str) const
|
||||
{
|
||||
return isNull ? str.isNull :
|
||||
str.isNull ? false : (0 == strcmp(str_, str.str_));
|
||||
}
|
||||
|
||||
///Compute a hash.
|
||||
hash_t toHash() const
|
||||
in{assert(!isNull);}
|
||||
body
|
||||
{
|
||||
static type = typeid(string);
|
||||
auto str = get();
|
||||
return type.getHash(&str);
|
||||
}
|
||||
|
||||
///Compare with another string.
|
||||
int opCmp(const ref ZeroString str) const
|
||||
in{assert(!isNull && !str.isNull);}
|
||||
body
|
||||
{
|
||||
return strcmp(str_, str.str_);
|
||||
}
|
||||
|
||||
///Is this string null (invalid)?
|
||||
@property bool isNull() const {return str_ is null;}
|
||||
}
|
|
@ -42,8 +42,8 @@ void testParser(bool verbose, string dataFilename, string canonicalFilename)
|
|||
*/
|
||||
void testLoader(bool verbose, string dataFilename, string canonicalFilename)
|
||||
{
|
||||
auto data = Loader(dataFilename).loadAll;
|
||||
auto canonical = Loader(canonicalFilename).loadAll;
|
||||
auto data = Loader(dataFilename).loadAll();
|
||||
auto canonical = Loader(canonicalFilename).loadAll();
|
||||
|
||||
assert(data.length == canonical.length, "Unequal node count");
|
||||
foreach(n; 0 .. data.length)
|
||||
|
@ -58,7 +58,7 @@ void testLoader(bool verbose, string dataFilename, string canonicalFilename)
|
|||
writeln("Canonical value:");
|
||||
writeln(canonical[n].debugString);
|
||||
}
|
||||
assert(false);
|
||||
assert(false, "testLoader(" ~ dataFilename ~ ", " ~ canonicalFilename ~ ") failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,7 +83,7 @@ void testEmitterOnData(bool verbose, string dataFilename, string canonicalFilena
|
|||
{
|
||||
//Must exist due to Anchor, Tags reference counts.
|
||||
auto loader = Loader(dataFilename);
|
||||
auto events = loader.parse();
|
||||
auto events = cast(Event[])loader.parse();
|
||||
auto emitStream = new MemoryStream;
|
||||
Dumper(emitStream).emit(events);
|
||||
|
||||
|
@ -98,7 +98,7 @@ void testEmitterOnData(bool verbose, string dataFilename, string canonicalFilena
|
|||
loader2.name = "TEST";
|
||||
loader2.constructor = new Constructor;
|
||||
loader2.resolver = new Resolver;
|
||||
auto newEvents = loader2.parse();
|
||||
auto newEvents = cast(Event[])loader2.parse();
|
||||
assert(compareEvents(events, newEvents));
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ void testEmitterOnCanonical(bool verbose, string canonicalFilename)
|
|||
{
|
||||
//Must exist due to Anchor, Tags reference counts.
|
||||
auto loader = Loader(canonicalFilename);
|
||||
auto events = loader.parse();
|
||||
auto events = cast(Event[])loader.parse();
|
||||
foreach(canonical; [false, true])
|
||||
{
|
||||
auto emitStream = new MemoryStream;
|
||||
|
@ -130,7 +130,7 @@ void testEmitterOnCanonical(bool verbose, string canonicalFilename)
|
|||
loader2.name = "TEST";
|
||||
loader2.constructor = new Constructor;
|
||||
loader2.resolver = new Resolver;
|
||||
auto newEvents = loader2.parse();
|
||||
auto newEvents = cast(Event[])loader2.parse();
|
||||
assert(compareEvents(events, newEvents));
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
|
|||
{
|
||||
//must exist due to Anchor, Tags reference counts
|
||||
auto loader = Loader(canonicalFilename);
|
||||
auto events = loader.parse();
|
||||
auto events = cast(Event[])loader.parse();
|
||||
foreach(flowStyle; [CollectionStyle.Block, CollectionStyle.Flow])
|
||||
{
|
||||
foreach(style; [ScalarStyle.Literal, ScalarStyle.Folded,
|
||||
|
@ -191,7 +191,7 @@ void testEmitterStyles(bool verbose, string dataFilename, string canonicalFilena
|
|||
loader2.name = "TEST";
|
||||
loader2.constructor = new Constructor;
|
||||
loader2.resolver = new Resolver;
|
||||
auto newEvents = loader2.parse();
|
||||
auto newEvents = cast(Event[])loader2.parse();
|
||||
assert(compareEvents(events, newEvents));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,7 +72,8 @@ void testLoaderErrorFilename(bool verbose, string errorFilename)
|
|||
if(verbose){writeln(typeid(e).toString(), "\n", e);}
|
||||
return;
|
||||
}
|
||||
assert(false, "Expected an exception");
|
||||
assert(false, "testLoaderErrorSingle(" ~ to!string(verbose) ~
|
||||
", " ~ errorFilename ~ ") Expected an exception");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue