Replaced possibleSimpleKeys associative array in Scanner with an

ordinary array, added a mixin to generate fast lookup tables for
character search, and used said mixin in the greatest
bottlenecks. Another great speedup, at least 20%, can't keep
track of it anymore.
This commit is contained in:
Ferdinand Majerech 2011-10-24 20:36:26 +02:00
parent 97693b4417
commit 97bdf819fa
4 changed files with 147 additions and 22 deletions

View file

@ -29,7 +29,7 @@ links = ../index.html Documentation home
# Source files or patterns to ignore. Supports regexp syntax. # Source files or patterns to ignore. Supports regexp syntax.
# E.g; To ignore main.d and all source files in the test/ directory, # E.g; To ignore main.d and all source files in the test/ directory,
# you would use: "main.d test/*" # you would use: "main.d test/*"
ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d ignore = test/*, examples/*, docsrc/*, autoddoc/*, yaml.d, unittest.d, cdc.d, dyaml/composer.d, dyaml/event.d, dyaml/parser.d, dyaml/reader.d, dyaml/scanner.d, dyaml/token.d, dyaml/util.d, dyaml/anchor.d, dyaml/emitter.d, dyaml/flags.d, dyaml/serializer.d, dyaml/sharedobject.d, dyaml/tag.d, dyaml/tagdirectives.d, dyaml/queue.d, dyaml/escapes.d, dyaml/fastcharsearch.d
[DDOC] [DDOC]
# Command to use to generate the documentation. # Command to use to generate the documentation.

102
dyaml/fastcharsearch.d Normal file
View file

@ -0,0 +1,102 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module dyaml.fastcharsearch;
import std.algorithm;
import std.conv;
package:
/**
* Mixin used for fast searching for a character in string.
*
* Creates a lookup table to quickly determine if a character
* is present in the string. Size of the lookup table is limited;
* any characters not represented in the table will be checked
* by ordinary equality comparison.
*
* Params: chars = String to search in.
* tableSize = Maximum number of bytes used by the table.
*
* Generated method:
* bool canFind(dchar c)
*
* Determines if a character is in the string.
*/
template FastCharSearch(dstring chars, uint tableSize = 256)
{
private mixin(searchCode!(chars, tableSize)());
}
///Generate the search table and the canFind method.
string searchCode(dstring chars, uint tableSize)()
{
const tableSizeStr = to!string(tableSize);
ubyte[tableSize] table;
table[] = 0;
//Characters that don't fit in the table.
dchar[] specialChars;
foreach(c; chars)
{
if(c < tableSize){table[c] = 1;}
else {specialChars ~= c;}
}
string tableCode()
{
string code = "static immutable ubyte table_[" ~ tableSizeStr ~ "] = [\n";
foreach(c; table[0 .. $ - 1])
{
code ~= c ? "true,\n" : "false,\n";
}
code ~= table[$ - 1] ? "true\n" : "false\n";
code ~= "];\n\n";
return code;
}
string specialCharsCode()
{
string code;
foreach(c; specialChars[0 .. $ - 1])
{
code ~= "cast(uint)c == " ~ to!string(cast(uint)c) ~ " || ";
}
code ~= "cast(uint)c == " ~ to!string(cast(uint)specialChars[$ - 1]);
return code;
}
string code = tableSize ? tableCode() : "";
code ~= "bool canFind(in dchar c) pure\n"
"{\n";
if(tableSize)
{
code ~= specialChars.length
?
" if(c < " ~ tableSizeStr ~ ")\n"
" {\n"
" return cast(bool)table_[c];\n"
" }\n"
:
" return cast(bool)table_[c];\n";
}
if(specialChars.length)
{
code ~= " return " ~ specialCharsCode() ~ ";\n";
}
code ~= " assert(false);\n"
"}\n";
return code;
}

View file

@ -18,6 +18,7 @@ import std.string;
import std.system; import std.system;
import std.utf; import std.utf;
import dyaml.fastcharsearch;
import dyaml.encoding; import dyaml.encoding;
import dyaml.exception; import dyaml.exception;
@ -216,8 +217,7 @@ final class Reader
*/ */
void forward(size_t length = 1) void forward(size_t length = 1)
{ {
//This is here due to optimization. mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
static newlines = "\n\u0085\u2028\u2029"d;
updateBuffer(length + 1); updateBuffer(length + 1);
while(length > 0) while(length > 0)
@ -226,7 +226,7 @@ final class Reader
++bufferOffset_; ++bufferOffset_;
++charIndex_; ++charIndex_;
//New line. //New line.
if(newlines.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{ {
++line_; ++line_;
column_ = 0; column_ = 0;

View file

@ -22,6 +22,7 @@ import std.string;
import std.typecons; import std.typecons;
import std.utf; import std.utf;
import dyaml.fastcharsearch;
import dyaml.escapes; import dyaml.escapes;
import dyaml.exception; import dyaml.exception;
import dyaml.queue; import dyaml.queue;
@ -82,20 +83,22 @@ final class Scanner
* simple key, we try to locate the corresponding ':' indicator. * simple key, we try to locate the corresponding ':' indicator.
* Simple keys should be limited to a single line and 1024 characters. * Simple keys should be limited to a single line and 1024 characters.
* *
* 24 bytes on 64-bit. * 16 bytes on 64-bit.
*/ */
static struct SimpleKey static struct SimpleKey
{ {
///Character index in reader where the key starts. ///Character index in reader where the key starts.
size_t charIndex; uint charIndex = uint.max;
///Index of the key token from start (first token scanned being 0). ///Index of the key token from start (first token scanned being 0).
uint tokenIndex; uint tokenIndex;
///Line the key starts at. ///Line the key starts at.
uint line; ushort line;
///Column the key starts at. ///Column the key starts at.
uint column; ushort column;
///Is this required to be a simple key? ///Is this required to be a simple key?
bool required; bool required;
///Is this struct "null" (invalid)?.
bool isNull;
} }
///Block chomping types. ///Block chomping types.
@ -138,8 +141,9 @@ final class Scanner
* may start at the current position. * may start at the current position.
*/ */
bool allowSimpleKey_ = true; bool allowSimpleKey_ = true;
///Possible simple keys indexed by flow levels. ///Possible simple keys indexed by flow levels.
SimpleKey[uint] possibleSimpleKeys_; SimpleKey[] possibleSimpleKeys_;
///Used for constructing strings while limiting reallocation. ///Used for constructing strings while limiting reallocation.
Appender!(dchar[]) appender_; Appender!(dchar[]) appender_;
@ -161,8 +165,8 @@ final class Scanner
clear(indents_); clear(indents_);
indents_ = null; indents_ = null;
clear(possibleSimpleKeys_); clear(possibleSimpleKeys_);
clear(appender_);
possibleSimpleKeys_ = null; possibleSimpleKeys_ = null;
clear(appender_);
reader_ = null; reader_ = null;
} }
@ -287,6 +291,7 @@ final class Scanner
uint minTokenNumber = uint.max; uint minTokenNumber = uint.max;
foreach(k, ref simpleKey; possibleSimpleKeys_) foreach(k, ref simpleKey; possibleSimpleKeys_)
{ {
if(simpleKey.isNull){continue;}
minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
} }
return minTokenNumber; return minTokenNumber;
@ -303,19 +308,18 @@ final class Scanner
*/ */
void stalePossibleSimpleKeys() void stalePossibleSimpleKeys()
{ {
uint[] levelsToRemove;
foreach(level, ref key; possibleSimpleKeys_) foreach(level, ref key; possibleSimpleKeys_)
{ {
if(key.isNull){continue;}
if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024) if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
{ {
enforce(!key.required, enforce(!key.required,
new Error("While scanning a simple key", new Error("While scanning a simple key",
Mark(key.line, key.column), Mark(key.line, key.column),
"could not find expected ':'", reader_.mark)); "could not find expected ':'", reader_.mark));
levelsToRemove ~= level; key.isNull = true;
} }
} }
foreach(level; levelsToRemove){possibleSimpleKeys_.remove(level);}
} }
/** /**
@ -335,21 +339,37 @@ final class Scanner
//The next token might be a simple key, so save its number and position. //The next token might be a simple key, so save its number and position.
removePossibleSimpleKey(); removePossibleSimpleKey();
uint tokenCount = tokensTaken_ + cast(uint)tokens_.length; uint tokenCount = tokensTaken_ + cast(uint)tokens_.length;
auto key = SimpleKey(reader_.charIndex, tokenCount, reader_.line,
reader_.column, required); const line = reader_.line;
const column = reader_.column;
const key = SimpleKey(cast(uint)reader_.charIndex,
tokenCount,
line < ushort.max ? cast(ushort)line : ushort.max,
column < ushort.max ? cast(ushort)column : ushort.max,
required);
if(possibleSimpleKeys_.length <= flowLevel_)
{
const oldLength = possibleSimpleKeys_.length;
possibleSimpleKeys_.length = flowLevel_ + 1;
//No need to initialize the last element, it's already done in the next line.
possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
}
possibleSimpleKeys_[flowLevel_] = key; possibleSimpleKeys_[flowLevel_] = key;
} }
///Remove the saved possible key position at the current flow level. ///Remove the saved possible key position at the current flow level.
void removePossibleSimpleKey() void removePossibleSimpleKey()
{ {
if((flowLevel_ in possibleSimpleKeys_) !is null) if(possibleSimpleKeys_.length <= flowLevel_){return;}
if(!possibleSimpleKeys_[flowLevel_].isNull)
{ {
auto key = possibleSimpleKeys_[flowLevel_]; const key = possibleSimpleKeys_[flowLevel_];
enforce(!key.required, enforce(!key.required,
new Error("While scanning a simple key", Mark(key.line, key.column), new Error("While scanning a simple key", Mark(key.line, key.column),
"could not find expected ':'", reader_.mark)); "could not find expected ':'", reader_.mark));
possibleSimpleKeys_.remove(flowLevel_); possibleSimpleKeys_[flowLevel_].isNull = true;
} }
} }
@ -417,7 +437,6 @@ final class Scanner
unwindIndent(-1); unwindIndent(-1);
removePossibleSimpleKey(); removePossibleSimpleKey();
allowSimpleKey_ = false; allowSimpleKey_ = false;
//There's probably a saner way to clear an associated array than this.
clear(possibleSimpleKeys_); clear(possibleSimpleKeys_);
tokens_.push(streamEndToken(reader_.mark, reader_.mark)); tokens_.push(streamEndToken(reader_.mark, reader_.mark));
@ -559,10 +578,11 @@ final class Scanner
void fetchValue() void fetchValue()
{ {
//Do we determine a simple key? //Do we determine a simple key?
if(canFind(possibleSimpleKeys_.keys, flowLevel_)) if(possibleSimpleKeys_.length > flowLevel_ &&
!possibleSimpleKeys_[flowLevel_].isNull)
{ {
auto key = possibleSimpleKeys_[flowLevel_]; auto key = possibleSimpleKeys_[flowLevel_];
possibleSimpleKeys_.remove(flowLevel_); possibleSimpleKeys_[flowLevel_].isNull = true;
Mark keyMark = Mark(key.line, key.column); Mark keyMark = Mark(key.line, key.column);
auto idx = key.tokenIndex - tokensTaken_; auto idx = key.tokenIndex - tokensTaken_;
@ -1242,7 +1262,10 @@ final class Scanner
{ {
dchar c = reader_.peek(); dchar c = reader_.peek();
uint length = 0; uint length = 0;
while(!(" \t\0\n\r\u0085\u2028\u2029\'\"\\"d.canFind(c)))
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
while(!search.canFind(c))
{ {
++length; ++length;
c = reader_.peek(length); c = reader_.peek(length);