diff --git a/autoddoc.cfg b/autoddoc.cfg index ad307ad..15aea1f 100644 --- a/autoddoc.cfg +++ b/autoddoc.cfg @@ -29,7 +29,7 @@ links = ../index.html Documentation home # Source files or patterns to ignore. Supports regexp syntax. # E.g; To ignore main.d and all source files in the test/ directory, # you would use: "main.d test/*" -ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d +ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d, dyaml/fastcharsearch.d [DDOC] # Command to use to generate the documentation. diff --git a/dyaml/fastcharsearch.d b/dyaml/fastcharsearch.d new file mode 100644 index 0000000..3c45cc2 --- /dev/null +++ b/dyaml/fastcharsearch.d @@ -0,0 +1,102 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml.fastcharsearch; + + +import std.algorithm; +import std.conv; + + +package: + +/** + * Mixin used for fast searching for a character in string. + * + * Creates a lookup table to quickly determine if a character + * is present in the string. Size of the lookup table is limited; + * any characters not represented in the table will be checked + * by ordinary equality comparison. + * + * Params: chars = String to search in. + * tableSize = Maximum number of bytes used by the table. + * + * Generated method: + * bool canFind(dchar c) + * + * Determines if a character is in the string. + */ +template FastCharSearch(dstring chars, uint tableSize = 256) +{ + private mixin(searchCode!(chars, tableSize)()); +} + +///Generate the search table and the canFind method. +string searchCode(dstring chars, uint tableSize)() +{ + const tableSizeStr = to!string(tableSize); + ubyte[tableSize] table; + table[] = 0; + + //Characters that don't fit in the table. + dchar[] specialChars; + + foreach(c; chars) + { + if(c < tableSize){table[c] = 1;} + else {specialChars ~= c;} + } + + string tableCode() + { + string code = "static immutable ubyte table_[" ~ tableSizeStr ~ "] = [\n"; + foreach(c; table[0 .. $ - 1]) + { + code ~= c ? "true,\n" : "false,\n"; + } + code ~= table[$ - 1] ? "true\n" : "false\n"; + code ~= "];\n\n"; + return code; + } + + string specialCharsCode() + { + string code; + foreach(c; specialChars[0 .. $ - 1]) + { + code ~= "cast(uint)c == " ~ to!string(cast(uint)c) ~ " || "; + } + code ~= "cast(uint)c == " ~ to!string(cast(uint)specialChars[$ - 1]); + + return code; + } + + string code = tableSize ? tableCode() : ""; + + code ~= "bool canFind(in dchar c) pure\n" + "{\n"; + + if(tableSize) + { + code ~= specialChars.length + ? + " if(c < " ~ tableSizeStr ~ ")\n" + " {\n" + " return cast(bool)table_[c];\n" + " }\n" + : + " return cast(bool)table_[c];\n"; + } + if(specialChars.length) + { + code ~= " return " ~ specialCharsCode() ~ ";\n"; + } + + code ~= " assert(false);\n" + "}\n"; + + return code; +} diff --git a/dyaml/reader.d b/dyaml/reader.d index 7687859..8e4e1a2 100644 --- a/dyaml/reader.d +++ b/dyaml/reader.d @@ -18,6 +18,7 @@ import std.string; import std.system; import std.utf; +import dyaml.fastcharsearch; import dyaml.encoding; import dyaml.exception; @@ -216,8 +217,7 @@ final class Reader */ void forward(size_t length = 1) { - //This is here due to optimization. - static newlines = "\n\u0085\u2028\u2029"d; + mixin FastCharSearch!"\n\u0085\u2028\u2029"d search; updateBuffer(length + 1); while(length > 0) @@ -226,7 +226,7 @@ final class Reader ++bufferOffset_; ++charIndex_; //New line. - if(newlines.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) + if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) { ++line_; column_ = 0; diff --git a/dyaml/scanner.d b/dyaml/scanner.d index 99564d2..c9d34e3 100644 --- a/dyaml/scanner.d +++ b/dyaml/scanner.d @@ -22,6 +22,7 @@ import std.string; import std.typecons; import std.utf; +import dyaml.fastcharsearch; import dyaml.escapes; import dyaml.exception; import dyaml.queue; @@ -82,20 +83,22 @@ final class Scanner * simple key, we try to locate the corresponding ':' indicator. * Simple keys should be limited to a single line and 1024 characters. * - * 24 bytes on 64-bit. + * 16 bytes on 64-bit. */ static struct SimpleKey { ///Character index in reader where the key starts. - size_t charIndex; + uint charIndex = uint.max; ///Index of the key token from start (first token scanned being 0). uint tokenIndex; ///Line the key starts at. - uint line; + ushort line; ///Column the key starts at. - uint column; + ushort column; ///Is this required to be a simple key? bool required; + ///Is this struct "null" (invalid)?. + bool isNull; } ///Block chomping types. @@ -138,8 +141,9 @@ final class Scanner * may start at the current position. */ bool allowSimpleKey_ = true; + ///Possible simple keys indexed by flow levels. - SimpleKey[uint] possibleSimpleKeys_; + SimpleKey[] possibleSimpleKeys_; ///Used for constructing strings while limiting reallocation. Appender!(dchar[]) appender_; @@ -161,8 +165,8 @@ final class Scanner clear(indents_); indents_ = null; clear(possibleSimpleKeys_); - clear(appender_); possibleSimpleKeys_ = null; + clear(appender_); reader_ = null; } @@ -287,6 +291,7 @@ final class Scanner uint minTokenNumber = uint.max; foreach(k, ref simpleKey; possibleSimpleKeys_) { + if(simpleKey.isNull){continue;} minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); } return minTokenNumber; @@ -303,19 +308,18 @@ final class Scanner */ void stalePossibleSimpleKeys() { - uint[] levelsToRemove; foreach(level, ref key; possibleSimpleKeys_) { + if(key.isNull){continue;} if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024) { enforce(!key.required, new Error("While scanning a simple key", Mark(key.line, key.column), "could not find expected ':'", reader_.mark)); - levelsToRemove ~= level; + key.isNull = true; } } - foreach(level; levelsToRemove){possibleSimpleKeys_.remove(level);} } /** @@ -335,21 +339,37 @@ final class Scanner //The next token might be a simple key, so save its number and position. removePossibleSimpleKey(); uint tokenCount = tokensTaken_ + cast(uint)tokens_.length; - auto key = SimpleKey(reader_.charIndex, tokenCount, reader_.line, - reader_.column, required); + + const line = reader_.line; + const column = reader_.column; + const key = SimpleKey(cast(uint)reader_.charIndex, + tokenCount, + line < ushort.max ? cast(ushort)line : ushort.max, + column < ushort.max ? cast(ushort)column : ushort.max, + required); + + if(possibleSimpleKeys_.length <= flowLevel_) + { + const oldLength = possibleSimpleKeys_.length; + possibleSimpleKeys_.length = flowLevel_ + 1; + //No need to initialize the last element, it's already done in the next line. + possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init; + } possibleSimpleKeys_[flowLevel_] = key; } ///Remove the saved possible key position at the current flow level. void removePossibleSimpleKey() { - if((flowLevel_ in possibleSimpleKeys_) !is null) + if(possibleSimpleKeys_.length <= flowLevel_){return;} + + if(!possibleSimpleKeys_[flowLevel_].isNull) { - auto key = possibleSimpleKeys_[flowLevel_]; + const key = possibleSimpleKeys_[flowLevel_]; enforce(!key.required, new Error("While scanning a simple key", Mark(key.line, key.column), "could not find expected ':'", reader_.mark)); - possibleSimpleKeys_.remove(flowLevel_); + possibleSimpleKeys_[flowLevel_].isNull = true; } } @@ -417,7 +437,6 @@ final class Scanner unwindIndent(-1); removePossibleSimpleKey(); allowSimpleKey_ = false; - //There's probably a saner way to clear an associated array than this. clear(possibleSimpleKeys_); tokens_.push(streamEndToken(reader_.mark, reader_.mark)); @@ -559,10 +578,11 @@ final class Scanner void fetchValue() { //Do we determine a simple key? - if(canFind(possibleSimpleKeys_.keys, flowLevel_)) + if(possibleSimpleKeys_.length > flowLevel_ && + !possibleSimpleKeys_[flowLevel_].isNull) { auto key = possibleSimpleKeys_[flowLevel_]; - possibleSimpleKeys_.remove(flowLevel_); + possibleSimpleKeys_[flowLevel_].isNull = true; Mark keyMark = Mark(key.line, key.column); auto idx = key.tokenIndex - tokensTaken_; @@ -1242,7 +1262,10 @@ final class Scanner { dchar c = reader_.peek(); uint length = 0; - while(!(" \t\0\n\r\u0085\u2028\u2029\'\"\\"d.canFind(c))) + + mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search; + + while(!search.canFind(c)) { ++length; c = reader_.peek(length);