Replaced possibleSimpleKeys associative array in Scanner with an
ordinary array, added a mixin to generate fast lookup tables for character search, and used said mixin in the greatest bottlenecks. Another great speedup, at least 20%, can't keep track of it anymore.
This commit is contained in:
parent
97693b4417
commit
97bdf819fa
|
@ -29,7 +29,7 @@ links = ../index.html Documentation home
|
|||
# Source files or patterns to ignore. Supports regexp syntax.
|
||||
# E.g; To ignore main.d and all source files in the test/ directory,
|
||||
# you would use: "main.d test/*"
|
||||
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d
|
||||
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d, dyaml/fastcharsearch.d
|
||||
|
||||
[DDOC]
|
||||
# Command to use to generate the documentation.
|
||||
|
|
102
dyaml/fastcharsearch.d
Normal file
102
dyaml/fastcharsearch.d
Normal file
|
@ -0,0 +1,102 @@
|
|||
|
||||
// Copyright Ferdinand Majerech 2011.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
module dyaml.fastcharsearch;
|
||||
|
||||
|
||||
import std.algorithm;
|
||||
import std.conv;
|
||||
|
||||
|
||||
package:
|
||||
|
||||
/**
|
||||
* Mixin used for fast searching for a character in string.
|
||||
*
|
||||
* Creates a lookup table to quickly determine if a character
|
||||
* is present in the string. Size of the lookup table is limited;
|
||||
* any characters not represented in the table will be checked
|
||||
* by ordinary equality comparison.
|
||||
*
|
||||
* Params: chars = String to search in.
|
||||
* tableSize = Maximum number of bytes used by the table.
|
||||
*
|
||||
* Generated method:
|
||||
* bool canFind(dchar c)
|
||||
*
|
||||
* Determines if a character is in the string.
|
||||
*/
|
||||
template FastCharSearch(dstring chars, uint tableSize = 256)
|
||||
{
|
||||
private mixin(searchCode!(chars, tableSize)());
|
||||
}
|
||||
|
||||
///Generate the search table and the canFind method.
|
||||
string searchCode(dstring chars, uint tableSize)()
|
||||
{
|
||||
const tableSizeStr = to!string(tableSize);
|
||||
ubyte[tableSize] table;
|
||||
table[] = 0;
|
||||
|
||||
//Characters that don't fit in the table.
|
||||
dchar[] specialChars;
|
||||
|
||||
foreach(c; chars)
|
||||
{
|
||||
if(c < tableSize){table[c] = 1;}
|
||||
else {specialChars ~= c;}
|
||||
}
|
||||
|
||||
string tableCode()
|
||||
{
|
||||
string code = "static immutable ubyte table_[" ~ tableSizeStr ~ "] = [\n";
|
||||
foreach(c; table[0 .. $ - 1])
|
||||
{
|
||||
code ~= c ? "true,\n" : "false,\n";
|
||||
}
|
||||
code ~= table[$ - 1] ? "true\n" : "false\n";
|
||||
code ~= "];\n\n";
|
||||
return code;
|
||||
}
|
||||
|
||||
string specialCharsCode()
|
||||
{
|
||||
string code;
|
||||
foreach(c; specialChars[0 .. $ - 1])
|
||||
{
|
||||
code ~= "cast(uint)c == " ~ to!string(cast(uint)c) ~ " || ";
|
||||
}
|
||||
code ~= "cast(uint)c == " ~ to!string(cast(uint)specialChars[$ - 1]);
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
string code = tableSize ? tableCode() : "";
|
||||
|
||||
code ~= "bool canFind(in dchar c) pure\n"
|
||||
"{\n";
|
||||
|
||||
if(tableSize)
|
||||
{
|
||||
code ~= specialChars.length
|
||||
?
|
||||
" if(c < " ~ tableSizeStr ~ ")\n"
|
||||
" {\n"
|
||||
" return cast(bool)table_[c];\n"
|
||||
" }\n"
|
||||
:
|
||||
" return cast(bool)table_[c];\n";
|
||||
}
|
||||
if(specialChars.length)
|
||||
{
|
||||
code ~= " return " ~ specialCharsCode() ~ ";\n";
|
||||
}
|
||||
|
||||
code ~= " assert(false);\n"
|
||||
"}\n";
|
||||
|
||||
return code;
|
||||
}
|
|
@ -18,6 +18,7 @@ import std.string;
|
|||
import std.system;
|
||||
import std.utf;
|
||||
|
||||
import dyaml.fastcharsearch;
|
||||
import dyaml.encoding;
|
||||
import dyaml.exception;
|
||||
|
||||
|
@ -216,8 +217,7 @@ final class Reader
|
|||
*/
|
||||
void forward(size_t length = 1)
|
||||
{
|
||||
//This is here due to optimization.
|
||||
static newlines = "\n\u0085\u2028\u2029"d;
|
||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||
updateBuffer(length + 1);
|
||||
|
||||
while(length > 0)
|
||||
|
@ -226,7 +226,7 @@ final class Reader
|
|||
++bufferOffset_;
|
||||
++charIndex_;
|
||||
//New line.
|
||||
if(newlines.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||
{
|
||||
++line_;
|
||||
column_ = 0;
|
||||
|
|
|
@ -22,6 +22,7 @@ import std.string;
|
|||
import std.typecons;
|
||||
import std.utf;
|
||||
|
||||
import dyaml.fastcharsearch;
|
||||
import dyaml.escapes;
|
||||
import dyaml.exception;
|
||||
import dyaml.queue;
|
||||
|
@ -82,20 +83,22 @@ final class Scanner
|
|||
* simple key, we try to locate the corresponding ':' indicator.
|
||||
* Simple keys should be limited to a single line and 1024 characters.
|
||||
*
|
||||
* 24 bytes on 64-bit.
|
||||
* 16 bytes on 64-bit.
|
||||
*/
|
||||
static struct SimpleKey
|
||||
{
|
||||
///Character index in reader where the key starts.
|
||||
size_t charIndex;
|
||||
uint charIndex = uint.max;
|
||||
///Index of the key token from start (first token scanned being 0).
|
||||
uint tokenIndex;
|
||||
///Line the key starts at.
|
||||
uint line;
|
||||
ushort line;
|
||||
///Column the key starts at.
|
||||
uint column;
|
||||
ushort column;
|
||||
///Is this required to be a simple key?
|
||||
bool required;
|
||||
///Is this struct "null" (invalid)?.
|
||||
bool isNull;
|
||||
}
|
||||
|
||||
///Block chomping types.
|
||||
|
@ -138,8 +141,9 @@ final class Scanner
|
|||
* may start at the current position.
|
||||
*/
|
||||
bool allowSimpleKey_ = true;
|
||||
|
||||
///Possible simple keys indexed by flow levels.
|
||||
SimpleKey[uint] possibleSimpleKeys_;
|
||||
SimpleKey[] possibleSimpleKeys_;
|
||||
|
||||
///Used for constructing strings while limiting reallocation.
|
||||
Appender!(dchar[]) appender_;
|
||||
|
@ -161,8 +165,8 @@ final class Scanner
|
|||
clear(indents_);
|
||||
indents_ = null;
|
||||
clear(possibleSimpleKeys_);
|
||||
clear(appender_);
|
||||
possibleSimpleKeys_ = null;
|
||||
clear(appender_);
|
||||
reader_ = null;
|
||||
}
|
||||
|
||||
|
@ -287,6 +291,7 @@ final class Scanner
|
|||
uint minTokenNumber = uint.max;
|
||||
foreach(k, ref simpleKey; possibleSimpleKeys_)
|
||||
{
|
||||
if(simpleKey.isNull){continue;}
|
||||
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
|
||||
}
|
||||
return minTokenNumber;
|
||||
|
@ -303,19 +308,18 @@ final class Scanner
|
|||
*/
|
||||
void stalePossibleSimpleKeys()
|
||||
{
|
||||
uint[] levelsToRemove;
|
||||
foreach(level, ref key; possibleSimpleKeys_)
|
||||
{
|
||||
if(key.isNull){continue;}
|
||||
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
|
||||
{
|
||||
enforce(!key.required,
|
||||
new Error("While scanning a simple key",
|
||||
Mark(key.line, key.column),
|
||||
"could not find expected ':'", reader_.mark));
|
||||
levelsToRemove ~= level;
|
||||
key.isNull = true;
|
||||
}
|
||||
}
|
||||
foreach(level; levelsToRemove){possibleSimpleKeys_.remove(level);}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -335,21 +339,37 @@ final class Scanner
|
|||
//The next token might be a simple key, so save its number and position.
|
||||
removePossibleSimpleKey();
|
||||
uint tokenCount = tokensTaken_ + cast(uint)tokens_.length;
|
||||
auto key = SimpleKey(reader_.charIndex, tokenCount, reader_.line,
|
||||
reader_.column, required);
|
||||
|
||||
const line = reader_.line;
|
||||
const column = reader_.column;
|
||||
const key = SimpleKey(cast(uint)reader_.charIndex,
|
||||
tokenCount,
|
||||
line < ushort.max ? cast(ushort)line : ushort.max,
|
||||
column < ushort.max ? cast(ushort)column : ushort.max,
|
||||
required);
|
||||
|
||||
if(possibleSimpleKeys_.length <= flowLevel_)
|
||||
{
|
||||
const oldLength = possibleSimpleKeys_.length;
|
||||
possibleSimpleKeys_.length = flowLevel_ + 1;
|
||||
//No need to initialize the last element, it's already done in the next line.
|
||||
possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
|
||||
}
|
||||
possibleSimpleKeys_[flowLevel_] = key;
|
||||
}
|
||||
|
||||
///Remove the saved possible key position at the current flow level.
|
||||
void removePossibleSimpleKey()
|
||||
{
|
||||
if((flowLevel_ in possibleSimpleKeys_) !is null)
|
||||
if(possibleSimpleKeys_.length <= flowLevel_){return;}
|
||||
|
||||
if(!possibleSimpleKeys_[flowLevel_].isNull)
|
||||
{
|
||||
auto key = possibleSimpleKeys_[flowLevel_];
|
||||
const key = possibleSimpleKeys_[flowLevel_];
|
||||
enforce(!key.required,
|
||||
new Error("While scanning a simple key", Mark(key.line, key.column),
|
||||
"could not find expected ':'", reader_.mark));
|
||||
possibleSimpleKeys_.remove(flowLevel_);
|
||||
possibleSimpleKeys_[flowLevel_].isNull = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -417,7 +437,6 @@ final class Scanner
|
|||
unwindIndent(-1);
|
||||
removePossibleSimpleKey();
|
||||
allowSimpleKey_ = false;
|
||||
//There's probably a saner way to clear an associated array than this.
|
||||
clear(possibleSimpleKeys_);
|
||||
|
||||
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
|
||||
|
@ -559,10 +578,11 @@ final class Scanner
|
|||
void fetchValue()
|
||||
{
|
||||
//Do we determine a simple key?
|
||||
if(canFind(possibleSimpleKeys_.keys, flowLevel_))
|
||||
if(possibleSimpleKeys_.length > flowLevel_ &&
|
||||
!possibleSimpleKeys_[flowLevel_].isNull)
|
||||
{
|
||||
auto key = possibleSimpleKeys_[flowLevel_];
|
||||
possibleSimpleKeys_.remove(flowLevel_);
|
||||
possibleSimpleKeys_[flowLevel_].isNull = true;
|
||||
Mark keyMark = Mark(key.line, key.column);
|
||||
auto idx = key.tokenIndex - tokensTaken_;
|
||||
|
||||
|
@ -1242,7 +1262,10 @@ final class Scanner
|
|||
{
|
||||
dchar c = reader_.peek();
|
||||
uint length = 0;
|
||||
while(!(" \t\0\n\r\u0085\u2028\u2029\'\"\\"d.canFind(c)))
|
||||
|
||||
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
|
||||
|
||||
while(!search.canFind(c))
|
||||
{
|
||||
++length;
|
||||
c = reader_.peek(length);
|
||||
|
|
Loading…
Reference in a new issue