From 62f7e2e4df6af7a10d1450a89e0911b99340da64 Mon Sep 17 00:00:00 2001
From: Ferdinand Majerech <kiithsacmp@gmail.com>
Date: Tue, 25 Oct 2011 20:23:44 +0200
Subject: [PATCH] Optimized dumping performance. Dumping is now about half as
 fast as loading. Also slightly improved loading performance. Greatly improved
 performance with very small files. We can now load 10000 24 byte files in a
 second, and dump them at the same speed. Fixed another FastCharSearch bug.

---
 dyaml/dumper.d         | 16 +++++++++--
 dyaml/emitter.d        | 65 ++++++++++++++++++++++++++----------------
 dyaml/fastcharsearch.d |  2 +-
 dyaml/loader.d         | 16 +++++++++--
 dyaml/reader.d         | 21 +++++++++-----
 dyaml/resolver.d       | 28 ++++++++----------
 dyaml/serializer.d     |  4 +--
 7 files changed, 98 insertions(+), 54 deletions(-)

diff --git a/dyaml/dumper.d b/dyaml/dumper.d
index 735e7dc..8078637 100644
--- a/dyaml/dumper.d
+++ b/dyaml/dumper.d
@@ -113,6 +113,18 @@ struct Dumper
     }
 
     private:
+        ///Resolver used by default.
+        static Resolver defaultResolver_;
+        ///Representer used by default.
+        static Representer defaultRepresenter_;
+
+        static this()
+        {
+            defaultResolver_ = new Resolver;
+            defaultRepresenter_ = new Representer;
+        }
+
+
         ///Resolver to resolve tags.
         Resolver resolver_;
         ///Representer to represent data types.
@@ -167,8 +179,8 @@ struct Dumper
         ///Construct a Dumper writing to a _stream. This is useful to e.g. write to memory.
         this(Stream stream)
         {
-            resolver_ = new Resolver();
-            representer_ = new Representer();
+            resolver_ = defaultResolver_;
+            representer_ = defaultRepresenter_;
             stream_ = stream;
             Anchor.addReference();
             TagDirectives.addReference();
diff --git a/dyaml/emitter.d b/dyaml/emitter.d
index 7be48f1..7b05b03 100644
--- a/dyaml/emitter.d
+++ b/dyaml/emitter.d
@@ -30,6 +30,7 @@ import dyaml.encoding;
 import dyaml.escapes;
 import dyaml.event;
 import dyaml.exception;
+import dyaml.fastcharsearch;
 import dyaml.flags;
 import dyaml.linebreak;
 import dyaml.queue;
@@ -63,6 +64,9 @@ align(4) struct ScalarAnalysis
            "allowSingleQuoted", "allowDoubleQuoted", "allowBlock", "isNull") flags;
 }
 
+///Quickly determines if a character is a newline.
+private mixin FastCharSearch!"\n\u0085\u2028\u2029"d newlineSearch_;
+
 //Emits YAML events into a file/stream.
 struct Emitter 
 {
@@ -365,9 +369,12 @@ struct Emitter
 
                 bool eq(ref tagDirective a, ref tagDirective b){return a.handle == b.handle;}
                 //Add any default tag directives that have not been overriden.
-                foreach(ref def; defaultTagDirectives_) if(!canFind!eq(tagDirectives_, def))
+                foreach(ref def; defaultTagDirectives_) 
                 {
-                    tagDirectives_ ~= def;
+                    if(!std.algorithm.canFind!eq(tagDirectives_, def))
+                    {
+                        tagDirectives_ ~= def;
+                    } 
                 }
 
                 const implicit = first && !event_.explicitDocument && !canonical_ &&
@@ -1017,15 +1024,18 @@ struct Emitter
 
             foreach(const size_t index, const dchar c; scalar)
             {
+                mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
+                mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
+
                 //Check for indicators.
                 if(index == 0)
                 {
                     //Leading indicators are special characters.
-                    if("#,[]{}&*!|>\'\"%@`"d.canFind(c))
+                    if(specialCharSearch.canFind(c))
                     {
                         flowIndicators = blockIndicators = true;
                     }
-                    if("?:"d.canFind(c))
+                    if(':' == c || '?' == c)
                     {
                         flowIndicators = true;
                         if(followedByWhitespace){blockIndicators = true;}
@@ -1038,7 +1048,7 @@ struct Emitter
                 else
                 {
                     //Some indicators cannot appear within a scalar as well.
-                    if(",?[]{}"d.canFind(c)){flowIndicators = true;}
+                    if(flowIndicatorSearch.canFind(c)){flowIndicators = true;}
                     if(c == ':')
                     {
                         flowIndicators = true;
@@ -1051,7 +1061,7 @@ struct Emitter
                 }
 
                 //Check for line breaks, special, and unicode characters.
-                if("\n\u0085\u2028\u2029"d.canFind(c)){lineBreaks = true;}
+                if(newlineSearch_.canFind(c)){lineBreaks = true;}
                 if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) &&
                    !((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') ||
                      (c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF'))
@@ -1068,7 +1078,7 @@ struct Emitter
                     previousSpace = true;
                     previousBreak = false;
                 }
-                else if("\n\u0085\u2028\u2029"d.canFind(c))
+                else if(newlineSearch_.canFind(c))
                 {
                     if(index == 0){leadingBreak = true;}
                     if(index == scalar.length - 1){trailingBreak = true;}
@@ -1081,10 +1091,11 @@ struct Emitter
                     previousSpace = previousBreak = false;
                 }
 
+                mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
                 //Prepare for the next character.
-                preceededByWhitespace = "\0\n\r\u0085\u2028\u2029 \t"d.canFind(c);
+                preceededByWhitespace = spaceSearch.canFind(c);
                 followedByWhitespace = index + 2 >= scalar.length || 
-                                       "\0\n\r\u0085\u2028\u2029 \t"d.canFind(scalar[index + 2]);
+                                       spaceSearch.canFind(scalar[index + 2]);
             }
 
             with(analysis.flags)
@@ -1310,14 +1321,14 @@ struct ScalarWriter
                 }
                 else if(breaks_)
                 {
-                    if(!"\n\u0085\u2028\u2029"d.canFind(c))
+                    if(!newlineSearch_.canFind(c))
                     {
                         writeStartLineBreak();
                         writeLineBreaks();
                         emitter_.writeIndent();
                     }
                 }
-                else if((c == dcharNone || "\' \n\u0085\u2028\u2029"d.canFind(c))
+                else if((c == dcharNone || "\' "d.canFind(c) || newlineSearch_.canFind(c))
                         && startChar_ < endChar_)
                 {
                     writeCurrentRange(Flag!"UpdateColumn".yes);
@@ -1410,7 +1421,7 @@ struct ScalarWriter
                 const dchar c = nextChar();
                 if(breaks_)
                 {
-                    if(!"\n\u0085\u2028\u2029"d.canFind(c))
+                    if(!newlineSearch_.canFind(c))
                     {
                         if(!leadingSpace && c != dcharNone && c != ' ')
                         {
@@ -1433,7 +1444,7 @@ struct ScalarWriter
                         writeCurrentRange(Flag!"UpdateColumn".yes);
                     }
                 }
-                else if(c == dcharNone || " \n\u0085\u2028\u2029"d.canFind(c))
+                else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ')
                 {
                     writeCurrentRange(Flag!"UpdateColumn".yes);
                     if(c == dcharNone){emitter_.writeLineBreak();}
@@ -1454,13 +1465,13 @@ struct ScalarWriter
                 const dchar c = nextChar();
                 if(breaks_)
                 {
-                    if(!"\n\u0085\u2028\u2029"d.canFind(c))
+                    if(!newlineSearch_.canFind(c))
                     {
                         writeLineBreaks();
                         if(c != dcharNone){emitter_.writeIndent();}
                     }
                 }
-                else if(c == dcharNone || "\n\u0085\u2028\u2029"d.canFind(c))
+                else if(c == dcharNone || newlineSearch_.canFind(c))
                 {
                     writeCurrentRange(Flag!"UpdateColumn".no);
                     if(c == dcharNone){emitter_.writeLineBreak();}
@@ -1500,14 +1511,14 @@ struct ScalarWriter
                 }
                 else if(breaks_)
                 {
-                    if(!"\n\u0085\u2028\u2029"d.canFind(c))
+                    if(!newlineSearch_.canFind(c))
                     {
                         writeStartLineBreak();
                         writeLineBreaks();
                         writeIndent(Flag!"ResetSpace".yes);
                     }
                 }
-                else if(c == dcharNone || " \n\u0085\u2028\u2029"d.canFind(c))
+                else if(c == dcharNone || newlineSearch_.canFind(c) || c == ' ')
                 {
                     writeCurrentRange(Flag!"UpdateColumn".yes);
                 }
@@ -1521,8 +1532,15 @@ struct ScalarWriter
         {
             ++endChar_;
             endByte_ = nextEndByte_;
-            return endByte_ < text_.length ? decode(text_, nextEndByte_) 
-                                           : dcharNone;
+            if(endByte_ >= text_.length){return dcharNone;}
+            const c = text_[nextEndByte_];
+            //c is ascii, no need to decode.
+            if(c < 0x80)
+            {
+                ++nextEndByte_;
+                return c;
+            }
+            return decode(text_, nextEndByte_);
         }
 
         ///Get character at start of the text range.
@@ -1555,16 +1573,15 @@ struct ScalarWriter
             const last = lastChar(text_, end);
             const secondLast = end > 0 ? lastChar(text_, end) : 0;
 
-            if(" \n\u0085\u2028\u2029"d.canFind(text_[0]))
+            if(newlineSearch_.canFind(text_[0]) || text_[0] == ' ')
             {
                 hints[hintsIdx++] = cast(char)('0' + bestIndent);
             }
-            if(!"\n\u0085\u2028\u2029"d.canFind(last))
+            if(!newlineSearch_.canFind(last))
             {
                 hints[hintsIdx++] = '-';
             }
-            else if(std.utf.count(text_) == 1 || 
-                    "\n\u0085\u2028\u2029"d.canFind(secondLast))
+            else if(std.utf.count(text_) == 1 || newlineSearch_.canFind(secondLast))
             {
                 hints[hintsIdx++] = '+';
             }
@@ -1636,7 +1653,7 @@ struct ScalarWriter
         void updateBreaks(in dchar c, in Flag!"UpdateSpaces" updateSpaces)
         {
             if(c == dcharNone){return;}
-            breaks_ = "\n\u0085\u2028\u2029"d.canFind(c);
+            breaks_ = newlineSearch_.canFind(c);
             if(updateSpaces){spaces_ = c == ' ';}
         }
 
diff --git a/dyaml/fastcharsearch.d b/dyaml/fastcharsearch.d
index d992d80..617da0c 100644
--- a/dyaml/fastcharsearch.d
+++ b/dyaml/fastcharsearch.d
@@ -89,7 +89,7 @@ string searchCode(dstring chars, uint tableSize)()
 
     code ~= specialChars.length 
             ? "    return " ~ specialCharsCode() ~ ";\n"
-            : "    return false";
+            : "    return false;";
     code ~= "}\n";
 
     return code;
diff --git a/dyaml/loader.d b/dyaml/loader.d
index 2a67417..fb96811 100644
--- a/dyaml/loader.d
+++ b/dyaml/loader.d
@@ -90,6 +90,18 @@ import dyaml.token;
 struct Loader
 {
     private:
+        ///Resolver used by default.
+        static Resolver defaultResolver_;
+        ///Constructor used by default.
+        static Constructor defaultConstructor_;
+
+        static this()
+        {
+            defaultResolver_ = new Resolver;
+            defaultConstructor_ = new Constructor;
+        }
+
+
         ///Reads character data from a stream.
         Reader reader_;
         ///Processes character data to YAML tokens.
@@ -138,8 +150,8 @@ struct Loader
                 reader_      = new Reader(stream);
                 scanner_     = new Scanner(reader_);
                 parser_      = new Parser(scanner_);
-                resolver_    = new Resolver;
-                constructor_ = new Constructor;
+                resolver_    = defaultResolver_;
+                constructor_ = defaultConstructor_;
                 Anchor.addReference();
                 TagDirectives.addReference();
             }
diff --git a/dyaml/reader.d b/dyaml/reader.d
index 43bf0e3..63e6097 100644
--- a/dyaml/reader.d
+++ b/dyaml/reader.d
@@ -149,9 +149,11 @@ final class Reader
          */
         dchar peek(in size_t index = 0)
         {
-            updateBuffer(index + 1);
-
-            if(buffer_.length < bufferOffset_ + index + 1)
+            if(buffer_.length <= bufferOffset_ + index + 1)
+            {
+                updateBuffer(index + 1);
+            }
+            if(buffer_.length <= bufferOffset_ + index)
             {
                 throw new ReaderException("Trying to read past the end of the stream");
             }
@@ -175,7 +177,10 @@ final class Reader
         dstring prefix(in size_t length)
         {
             if(length == 0){return "";}
-            updateBuffer(length);
+            if(buffer_.length <= bufferOffset_ + length)
+            {
+                updateBuffer(length);
+            }
             const end = min(buffer_.length, bufferOffset_ + length);
             //need to duplicate as we change buffer content with C functions
             //and could end up with returned string referencing changed data
@@ -225,7 +230,11 @@ final class Reader
         void forward(size_t length = 1)
         {
             mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
-            updateBuffer(length + 1);
+
+            if(buffer_.length <= bufferOffset_ + length + 1)
+            {
+                updateBuffer(length + 1);
+            }
 
             while(length > 0)
             {
@@ -272,8 +281,6 @@ final class Reader
          */
         void updateBuffer(in size_t length)
         {
-            if(buffer_.length > bufferOffset_ + length){return;}
-
             //get rid of unneeded data in the buffer
             if(bufferOffset_ > 0)
             {
diff --git a/dyaml/resolver.d b/dyaml/resolver.d
index a466712..51f30f6 100644
--- a/dyaml/resolver.d
+++ b/dyaml/resolver.d
@@ -144,23 +144,19 @@ final class Resolver
 
             if(kind == NodeID.Scalar)
             {
-                if(implicit)
+                if(!implicit){return defaultScalarTag_;}
+
+                //Get the first char of the value.
+                size_t dummy;
+                const dchar first = value.length == 0 ? '\0' : decode(value, dummy);
+
+                auto resolvers = (first in yamlImplicitResolvers_) is null ? 
+                                 [] : yamlImplicitResolvers_[first];
+
+                //If regexp matches, return tag.
+                foreach(resolver; resolvers) if(!(match(value, resolver[1]).empty))
                 {
-                    //Get the first char of the value.
-                    size_t dummy;
-                    const dchar first = value.length == 0 ? '\0' : decode(value, dummy);
-
-                    auto resolvers = (first in yamlImplicitResolvers_) is null ? 
-                                     [] : yamlImplicitResolvers_[first];
-
-                    foreach(resolver; resolvers)
-                    {
-                        //If regexp matches, return tag.
-                        if(!(match(value, resolver[1]).empty))
-                        {
-                            return resolver[0];
-                        }
-                    }
+                    return resolver[0];
                 }
                 return defaultScalarTag_;
             }
diff --git a/dyaml/serializer.d b/dyaml/serializer.d
index 1977efc..451b5fb 100644
--- a/dyaml/serializer.d
+++ b/dyaml/serializer.d
@@ -194,8 +194,8 @@ struct Serializer
             {
                 assert(node.isType!string, "Scalar node type must be string before serialized");
                 auto value = node.as!string;
-                const Tag detectedTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, true);
-                const Tag defaultTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, false);
+                const detectedTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, true);
+                const defaultTag = resolver_.resolve(NodeID.Scalar, Tag(null), value, false);
                 bool isDetected = node.tag_ == detectedTag;
                 bool isDefault = node.tag_ == defaultTag;