Replaced possibleSimpleKeys associative array in Scanner with an

ordinary array, added a mixin to generate fast lookup tables for
character search, and used said mixin in the greatest
bottlenecks. Another great speedup, at least 20%, can't keep
track of it anymore.
This commit is contained in:
Ferdinand Majerech 2011-10-24 20:36:26 +02:00
parent 97693b4417
commit 97bdf819fa
4 changed files with 147 additions and 22 deletions

View file

@ -29,7 +29,7 @@ links = ../index.html Documentation home
# Source files or patterns to ignore. Supports regexp syntax.
# E.g; To ignore main.d and all source files in the test/ directory,
# you would use: "main.d test/*"
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d, dyaml/fastcharsearch.d
[DDOC]
# Command to use to generate the documentation.

102
dyaml/fastcharsearch.d Normal file
View file

@ -0,0 +1,102 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module dyaml.fastcharsearch;
import std.algorithm;
import std.conv;
package:
/**
* Mixin used for fast searching for a character in string.
*
* Creates a lookup table to quickly determine if a character
* is present in the string. Size of the lookup table is limited;
* any characters not represented in the table will be checked
* by ordinary equality comparison.
*
* Params: chars = String to search in.
* tableSize = Maximum number of bytes used by the table.
*
* Generated method:
* bool canFind(dchar c)
*
* Determines if a character is in the string.
*/
template FastCharSearch(dstring chars, uint tableSize = 256)
{
private mixin(searchCode!(chars, tableSize)());
}
///Generate the search table and the canFind method.
string searchCode(dstring chars, uint tableSize)()
{
const tableSizeStr = to!string(tableSize);
ubyte[tableSize] table;
table[] = 0;
//Characters that don't fit in the table.
dchar[] specialChars;
foreach(c; chars)
{
if(c < tableSize){table[c] = 1;}
else {specialChars ~= c;}
}
string tableCode()
{
string code = "static immutable ubyte table_[" ~ tableSizeStr ~ "] = [\n";
foreach(c; table[0 .. $ - 1])
{
code ~= c ? "true,\n" : "false,\n";
}
code ~= table[$ - 1] ? "true\n" : "false\n";
code ~= "];\n\n";
return code;
}
string specialCharsCode()
{
string code;
foreach(c; specialChars[0 .. $ - 1])
{
code ~= "cast(uint)c == " ~ to!string(cast(uint)c) ~ " || ";
}
code ~= "cast(uint)c == " ~ to!string(cast(uint)specialChars[$ - 1]);
return code;
}
string code = tableSize ? tableCode() : "";
code ~= "bool canFind(in dchar c) pure\n"
"{\n";
if(tableSize)
{
code ~= specialChars.length
?
" if(c < " ~ tableSizeStr ~ ")\n"
" {\n"
" return cast(bool)table_[c];\n"
" }\n"
:
" return cast(bool)table_[c];\n";
}
if(specialChars.length)
{
code ~= " return " ~ specialCharsCode() ~ ";\n";
}
code ~= " assert(false);\n"
"}\n";
return code;
}

View file

@ -18,6 +18,7 @@ import std.string;
import std.system;
import std.utf;
import dyaml.fastcharsearch;
import dyaml.encoding;
import dyaml.exception;
@ -216,8 +217,7 @@ final class Reader
*/
void forward(size_t length = 1)
{
//This is here due to optimization.
static newlines = "\n\u0085\u2028\u2029"d;
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
updateBuffer(length + 1);
while(length > 0)
@ -226,7 +226,7 @@ final class Reader
++bufferOffset_;
++charIndex_;
//New line.
if(newlines.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{
++line_;
column_ = 0;

View file

@ -22,6 +22,7 @@ import std.string;
import std.typecons;
import std.utf;
import dyaml.fastcharsearch;
import dyaml.escapes;
import dyaml.exception;
import dyaml.queue;
@ -82,20 +83,22 @@ final class Scanner
* simple key, we try to locate the corresponding ':' indicator.
* Simple keys should be limited to a single line and 1024 characters.
*
* 24 bytes on 64-bit.
* 16 bytes on 64-bit.
*/
static struct SimpleKey
{
///Character index in reader where the key starts.
size_t charIndex;
uint charIndex = uint.max;
///Index of the key token from start (first token scanned being 0).
uint tokenIndex;
///Line the key starts at.
uint line;
ushort line;
///Column the key starts at.
uint column;
ushort column;
///Is this required to be a simple key?
bool required;
///Is this struct "null" (invalid)?.
bool isNull;
}
///Block chomping types.
@ -138,8 +141,9 @@ final class Scanner
* may start at the current position.
*/
bool allowSimpleKey_ = true;
///Possible simple keys indexed by flow levels.
SimpleKey[uint] possibleSimpleKeys_;
SimpleKey[] possibleSimpleKeys_;
///Used for constructing strings while limiting reallocation.
Appender!(dchar[]) appender_;
@ -161,8 +165,8 @@ final class Scanner
clear(indents_);
indents_ = null;
clear(possibleSimpleKeys_);
clear(appender_);
possibleSimpleKeys_ = null;
clear(appender_);
reader_ = null;
}
@ -287,6 +291,7 @@ final class Scanner
uint minTokenNumber = uint.max;
foreach(k, ref simpleKey; possibleSimpleKeys_)
{
if(simpleKey.isNull){continue;}
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
}
return minTokenNumber;
@ -303,19 +308,18 @@ final class Scanner
*/
void stalePossibleSimpleKeys()
{
uint[] levelsToRemove;
foreach(level, ref key; possibleSimpleKeys_)
{
if(key.isNull){continue;}
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
{
enforce(!key.required,
new Error("While scanning a simple key",
Mark(key.line, key.column),
"could not find expected ':'", reader_.mark));
levelsToRemove ~= level;
key.isNull = true;
}
}
foreach(level; levelsToRemove){possibleSimpleKeys_.remove(level);}
}
/**
@ -335,21 +339,37 @@ final class Scanner
//The next token might be a simple key, so save its number and position.
removePossibleSimpleKey();
uint tokenCount = tokensTaken_ + cast(uint)tokens_.length;
auto key = SimpleKey(reader_.charIndex, tokenCount, reader_.line,
reader_.column, required);
const line = reader_.line;
const column = reader_.column;
const key = SimpleKey(cast(uint)reader_.charIndex,
tokenCount,
line < ushort.max ? cast(ushort)line : ushort.max,
column < ushort.max ? cast(ushort)column : ushort.max,
required);
if(possibleSimpleKeys_.length <= flowLevel_)
{
const oldLength = possibleSimpleKeys_.length;
possibleSimpleKeys_.length = flowLevel_ + 1;
//No need to initialize the last element, it's already done in the next line.
possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
}
possibleSimpleKeys_[flowLevel_] = key;
}
///Remove the saved possible key position at the current flow level.
void removePossibleSimpleKey()
{
if((flowLevel_ in possibleSimpleKeys_) !is null)
if(possibleSimpleKeys_.length <= flowLevel_){return;}
if(!possibleSimpleKeys_[flowLevel_].isNull)
{
auto key = possibleSimpleKeys_[flowLevel_];
const key = possibleSimpleKeys_[flowLevel_];
enforce(!key.required,
new Error("While scanning a simple key", Mark(key.line, key.column),
"could not find expected ':'", reader_.mark));
possibleSimpleKeys_.remove(flowLevel_);
possibleSimpleKeys_[flowLevel_].isNull = true;
}
}
@ -417,7 +437,6 @@ final class Scanner
unwindIndent(-1);
removePossibleSimpleKey();
allowSimpleKey_ = false;
//There's probably a saner way to clear an associated array than this.
clear(possibleSimpleKeys_);
tokens_.push(streamEndToken(reader_.mark, reader_.mark));
@ -559,10 +578,11 @@ final class Scanner
void fetchValue()
{
//Do we determine a simple key?
if(canFind(possibleSimpleKeys_.keys, flowLevel_))
if(possibleSimpleKeys_.length > flowLevel_ &&
!possibleSimpleKeys_[flowLevel_].isNull)
{
auto key = possibleSimpleKeys_[flowLevel_];
possibleSimpleKeys_.remove(flowLevel_);
possibleSimpleKeys_[flowLevel_].isNull = true;
Mark keyMark = Mark(key.line, key.column);
auto idx = key.tokenIndex - tokensTaken_;
@ -1242,7 +1262,10 @@ final class Scanner
{
dchar c = reader_.peek();
uint length = 0;
while(!(" \t\0\n\r\u0085\u2028\u2029\'\"\\"d.canFind(c)))
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
while(!search.canFind(c))
{
++length;
c = reader_.peek(length);