Replaced possibleSimpleKeys associative array in Scanner with an
ordinary array, added a mixin to generate fast lookup tables for character search, and used said mixin in the greatest bottlenecks. Another great speedup, at least 20%, can't keep track of it anymore.
This commit is contained in:
parent
97693b4417
commit
97bdf819fa
|
@ -29,7 +29,7 @@ links = ../index.html Documentation home
|
||||||
# Source files or patterns to ignore. Supports regexp syntax.
|
# Source files or patterns to ignore. Supports regexp syntax.
|
||||||
# E.g; To ignore main.d and all source files in the test/ directory,
|
# E.g; To ignore main.d and all source files in the test/ directory,
|
||||||
# you would use: "main.d test/*"
|
# you would use: "main.d test/*"
|
||||||
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d
|
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d, dyaml/fastcharsearch.d
|
||||||
|
|
||||||
[DDOC]
|
[DDOC]
|
||||||
# Command to use to generate the documentation.
|
# Command to use to generate the documentation.
|
||||||
|
|
102
dyaml/fastcharsearch.d
Normal file
102
dyaml/fastcharsearch.d
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
|
||||||
|
// Copyright Ferdinand Majerech 2011.
|
||||||
|
// Distributed under the Boost Software License, Version 1.0.
|
||||||
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||||
|
// http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
module dyaml.fastcharsearch;
|
||||||
|
|
||||||
|
|
||||||
|
import std.algorithm;
|
||||||
|
import std.conv;
|
||||||
|
|
||||||
|
|
||||||
|
package:
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mixin used for fast searching for a character in string.
|
||||||
|
*
|
||||||
|
* Creates a lookup table to quickly determine if a character
|
||||||
|
* is present in the string. Size of the lookup table is limited;
|
||||||
|
* any characters not represented in the table will be checked
|
||||||
|
* by ordinary equality comparison.
|
||||||
|
*
|
||||||
|
* Params: chars = String to search in.
|
||||||
|
* tableSize = Maximum number of bytes used by the table.
|
||||||
|
*
|
||||||
|
* Generated method:
|
||||||
|
* bool canFind(dchar c)
|
||||||
|
*
|
||||||
|
* Determines if a character is in the string.
|
||||||
|
*/
|
||||||
|
template FastCharSearch(dstring chars, uint tableSize = 256)
|
||||||
|
{
|
||||||
|
private mixin(searchCode!(chars, tableSize)());
|
||||||
|
}
|
||||||
|
|
||||||
|
///Generate the search table and the canFind method.
|
||||||
|
string searchCode(dstring chars, uint tableSize)()
|
||||||
|
{
|
||||||
|
const tableSizeStr = to!string(tableSize);
|
||||||
|
ubyte[tableSize] table;
|
||||||
|
table[] = 0;
|
||||||
|
|
||||||
|
//Characters that don't fit in the table.
|
||||||
|
dchar[] specialChars;
|
||||||
|
|
||||||
|
foreach(c; chars)
|
||||||
|
{
|
||||||
|
if(c < tableSize){table[c] = 1;}
|
||||||
|
else {specialChars ~= c;}
|
||||||
|
}
|
||||||
|
|
||||||
|
string tableCode()
|
||||||
|
{
|
||||||
|
string code = "static immutable ubyte table_[" ~ tableSizeStr ~ "] = [\n";
|
||||||
|
foreach(c; table[0 .. $ - 1])
|
||||||
|
{
|
||||||
|
code ~= c ? "true,\n" : "false,\n";
|
||||||
|
}
|
||||||
|
code ~= table[$ - 1] ? "true\n" : "false\n";
|
||||||
|
code ~= "];\n\n";
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
string specialCharsCode()
|
||||||
|
{
|
||||||
|
string code;
|
||||||
|
foreach(c; specialChars[0 .. $ - 1])
|
||||||
|
{
|
||||||
|
code ~= "cast(uint)c == " ~ to!string(cast(uint)c) ~ " || ";
|
||||||
|
}
|
||||||
|
code ~= "cast(uint)c == " ~ to!string(cast(uint)specialChars[$ - 1]);
|
||||||
|
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
string code = tableSize ? tableCode() : "";
|
||||||
|
|
||||||
|
code ~= "bool canFind(in dchar c) pure\n"
|
||||||
|
"{\n";
|
||||||
|
|
||||||
|
if(tableSize)
|
||||||
|
{
|
||||||
|
code ~= specialChars.length
|
||||||
|
?
|
||||||
|
" if(c < " ~ tableSizeStr ~ ")\n"
|
||||||
|
" {\n"
|
||||||
|
" return cast(bool)table_[c];\n"
|
||||||
|
" }\n"
|
||||||
|
:
|
||||||
|
" return cast(bool)table_[c];\n";
|
||||||
|
}
|
||||||
|
if(specialChars.length)
|
||||||
|
{
|
||||||
|
code ~= " return " ~ specialCharsCode() ~ ";\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
code ~= " assert(false);\n"
|
||||||
|
"}\n";
|
||||||
|
|
||||||
|
return code;
|
||||||
|
}
|
|
@ -18,6 +18,7 @@ import std.string;
|
||||||
import std.system;
|
import std.system;
|
||||||
import std.utf;
|
import std.utf;
|
||||||
|
|
||||||
|
import dyaml.fastcharsearch;
|
||||||
import dyaml.encoding;
|
import dyaml.encoding;
|
||||||
import dyaml.exception;
|
import dyaml.exception;
|
||||||
|
|
||||||
|
@ -216,8 +217,7 @@ final class Reader
|
||||||
*/
|
*/
|
||||||
void forward(size_t length = 1)
|
void forward(size_t length = 1)
|
||||||
{
|
{
|
||||||
//This is here due to optimization.
|
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||||
static newlines = "\n\u0085\u2028\u2029"d;
|
|
||||||
updateBuffer(length + 1);
|
updateBuffer(length + 1);
|
||||||
|
|
||||||
while(length > 0)
|
while(length > 0)
|
||||||
|
@ -226,7 +226,7 @@ final class Reader
|
||||||
++bufferOffset_;
|
++bufferOffset_;
|
||||||
++charIndex_;
|
++charIndex_;
|
||||||
//New line.
|
//New line.
|
||||||
if(newlines.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||||
{
|
{
|
||||||
++line_;
|
++line_;
|
||||||
column_ = 0;
|
column_ = 0;
|
||||||
|
|
|
@ -22,6 +22,7 @@ import std.string;
|
||||||
import std.typecons;
|
import std.typecons;
|
||||||
import std.utf;
|
import std.utf;
|
||||||
|
|
||||||
|
import dyaml.fastcharsearch;
|
||||||
import dyaml.escapes;
|
import dyaml.escapes;
|
||||||
import dyaml.exception;
|
import dyaml.exception;
|
||||||
import dyaml.queue;
|
import dyaml.queue;
|
||||||
|
@ -82,20 +83,22 @@ final class Scanner
|
||||||
* simple key, we try to locate the corresponding ':' indicator.
|
* simple key, we try to locate the corresponding ':' indicator.
|
||||||
* Simple keys should be limited to a single line and 1024 characters.
|
* Simple keys should be limited to a single line and 1024 characters.
|
||||||
*
|
*
|
||||||
* 24 bytes on 64-bit.
|
* 16 bytes on 64-bit.
|
||||||
*/
|
*/
|
||||||
static struct SimpleKey
|
static struct SimpleKey
|
||||||
{
|
{
|
||||||
///Character index in reader where the key starts.
|
///Character index in reader where the key starts.
|
||||||
size_t charIndex;
|
uint charIndex = uint.max;
|
||||||
///Index of the key token from start (first token scanned being 0).
|
///Index of the key token from start (first token scanned being 0).
|
||||||
uint tokenIndex;
|
uint tokenIndex;
|
||||||
///Line the key starts at.
|
///Line the key starts at.
|
||||||
uint line;
|
ushort line;
|
||||||
///Column the key starts at.
|
///Column the key starts at.
|
||||||
uint column;
|
ushort column;
|
||||||
///Is this required to be a simple key?
|
///Is this required to be a simple key?
|
||||||
bool required;
|
bool required;
|
||||||
|
///Is this struct "null" (invalid)?.
|
||||||
|
bool isNull;
|
||||||
}
|
}
|
||||||
|
|
||||||
///Block chomping types.
|
///Block chomping types.
|
||||||
|
@ -138,8 +141,9 @@ final class Scanner
|
||||||
* may start at the current position.
|
* may start at the current position.
|
||||||
*/
|
*/
|
||||||
bool allowSimpleKey_ = true;
|
bool allowSimpleKey_ = true;
|
||||||
|
|
||||||
///Possible simple keys indexed by flow levels.
|
///Possible simple keys indexed by flow levels.
|
||||||
SimpleKey[uint] possibleSimpleKeys_;
|
SimpleKey[] possibleSimpleKeys_;
|
||||||
|
|
||||||
///Used for constructing strings while limiting reallocation.
|
///Used for constructing strings while limiting reallocation.
|
||||||
Appender!(dchar[]) appender_;
|
Appender!(dchar[]) appender_;
|
||||||
|
@ -161,8 +165,8 @@ final class Scanner
|
||||||
clear(indents_);
|
clear(indents_);
|
||||||
indents_ = null;
|
indents_ = null;
|
||||||
clear(possibleSimpleKeys_);
|
clear(possibleSimpleKeys_);
|
||||||
clear(appender_);
|
|
||||||
possibleSimpleKeys_ = null;
|
possibleSimpleKeys_ = null;
|
||||||
|
clear(appender_);
|
||||||
reader_ = null;
|
reader_ = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -287,6 +291,7 @@ final class Scanner
|
||||||
uint minTokenNumber = uint.max;
|
uint minTokenNumber = uint.max;
|
||||||
foreach(k, ref simpleKey; possibleSimpleKeys_)
|
foreach(k, ref simpleKey; possibleSimpleKeys_)
|
||||||
{
|
{
|
||||||
|
if(simpleKey.isNull){continue;}
|
||||||
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
|
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
|
||||||
}
|
}
|
||||||
return minTokenNumber;
|
return minTokenNumber;
|
||||||
|
@ -303,19 +308,18 @@ final class Scanner
|
||||||
*/
|
*/
|
||||||
void stalePossibleSimpleKeys()
|
void stalePossibleSimpleKeys()
|
||||||
{
|
{
|
||||||
uint[] levelsToRemove;
|
|
||||||
foreach(level, ref key; possibleSimpleKeys_)
|
foreach(level, ref key; possibleSimpleKeys_)
|
||||||
{
|
{
|
||||||
|
if(key.isNull){continue;}
|
||||||
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
|
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
|
||||||
{
|
{
|
||||||
enforce(!key.required,
|
enforce(!key.required,
|
||||||
new Error("While scanning a simple key",
|
new Error("While scanning a simple key",
|
||||||
Mark(key.line, key.column),
|
Mark(key.line, key.column),
|
||||||
"could not find expected ':'", reader_.mark));
|
"could not find expected ':'", reader_.mark));
|
||||||
levelsToRemove ~= level;
|
key.isNull = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
foreach(level; levelsToRemove){possibleSimpleKeys_.remove(level);}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -335,21 +339,37 @@ final class Scanner
|
||||||
//The next token might be a simple key, so save its number and position.
|
//The next token might be a simple key, so save its number and position.
|
||||||
removePossibleSimpleKey();
|
removePossibleSimpleKey();
|
||||||
uint tokenCount = tokensTaken_ + cast(uint)tokens_.length;
|
uint tokenCount = tokensTaken_ + cast(uint)tokens_.length;
|
||||||
auto key = SimpleKey(reader_.charIndex, tokenCount, reader_.line,
|
|
||||||
reader_.column, required);
|
const line = reader_.line;
|
||||||
|
const column = reader_.column;
|
||||||
|
const key = SimpleKey(cast(uint)reader_.charIndex,
|
||||||
|
tokenCount,
|
||||||
|
line < ushort.max ? cast(ushort)line : ushort.max,
|
||||||
|
column < ushort.max ? cast(ushort)column : ushort.max,
|
||||||
|
required);
|
||||||
|
|
||||||
|
if(possibleSimpleKeys_.length <= flowLevel_)
|
||||||
|
{
|
||||||
|
const oldLength = possibleSimpleKeys_.length;
|
||||||
|
possibleSimpleKeys_.length = flowLevel_ + 1;
|
||||||
|
//No need to initialize the last element, it's already done in the next line.
|
||||||
|
possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
|
||||||
|
}
|
||||||
possibleSimpleKeys_[flowLevel_] = key;
|
possibleSimpleKeys_[flowLevel_] = key;
|
||||||
}
|
}
|
||||||
|
|
||||||
///Remove the saved possible key position at the current flow level.
|
///Remove the saved possible key position at the current flow level.
|
||||||
void removePossibleSimpleKey()
|
void removePossibleSimpleKey()
|
||||||
{
|
{
|
||||||
if((flowLevel_ in possibleSimpleKeys_) !is null)
|
if(possibleSimpleKeys_.length <= flowLevel_){return;}
|
||||||
|
|
||||||
|
if(!possibleSimpleKeys_[flowLevel_].isNull)
|
||||||
{
|
{
|
||||||
auto key = possibleSimpleKeys_[flowLevel_];
|
const key = possibleSimpleKeys_[flowLevel_];
|
||||||
enforce(!key.required,
|
enforce(!key.required,
|
||||||
new Error("While scanning a simple key", Mark(key.line, key.column),
|
new Error("While scanning a simple key", Mark(key.line, key.column),
|
||||||
"could not find expected ':'", reader_.mark));
|
"could not find expected ':'", reader_.mark));
|
||||||
possibleSimpleKeys_.remove(flowLevel_);
|
possibleSimpleKeys_[flowLevel_].isNull = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -417,7 +437,6 @@ final class Scanner
|
||||||
unwindIndent(-1);
|
unwindIndent(-1);
|
||||||
removePossibleSimpleKey();
|
removePossibleSimpleKey();
|
||||||
allowSimpleKey_ = false;
|
allowSimpleKey_ = false;
|
||||||
//There's probably a saner way to clear an associated array than this.
|
|
||||||
clear(possibleSimpleKeys_);
|
clear(possibleSimpleKeys_);
|
||||||
|
|
||||||
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
|
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
|
||||||
|
@ -559,10 +578,11 @@ final class Scanner
|
||||||
void fetchValue()
|
void fetchValue()
|
||||||
{
|
{
|
||||||
//Do we determine a simple key?
|
//Do we determine a simple key?
|
||||||
if(canFind(possibleSimpleKeys_.keys, flowLevel_))
|
if(possibleSimpleKeys_.length > flowLevel_ &&
|
||||||
|
!possibleSimpleKeys_[flowLevel_].isNull)
|
||||||
{
|
{
|
||||||
auto key = possibleSimpleKeys_[flowLevel_];
|
auto key = possibleSimpleKeys_[flowLevel_];
|
||||||
possibleSimpleKeys_.remove(flowLevel_);
|
possibleSimpleKeys_[flowLevel_].isNull = true;
|
||||||
Mark keyMark = Mark(key.line, key.column);
|
Mark keyMark = Mark(key.line, key.column);
|
||||||
auto idx = key.tokenIndex - tokensTaken_;
|
auto idx = key.tokenIndex - tokensTaken_;
|
||||||
|
|
||||||
|
@ -1242,7 +1262,10 @@ final class Scanner
|
||||||
{
|
{
|
||||||
dchar c = reader_.peek();
|
dchar c = reader_.peek();
|
||||||
uint length = 0;
|
uint length = 0;
|
||||||
while(!(" \t\0\n\r\u0085\u2028\u2029\'\"\\"d.canFind(c)))
|
|
||||||
|
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
|
||||||
|
|
||||||
|
while(!search.canFind(c))
|
||||||
{
|
{
|
||||||
++length;
|
++length;
|
||||||
c = reader_.peek(length);
|
c = reader_.peek(length);
|
||||||
|
|
Loading…
Reference in a new issue