remove fastcharsearch

This commit is contained in:
Cameron Ross 2018-04-21 01:52:58 -03:00
parent 993dd6c5bc
commit cd88b7ad17
No known key found for this signature in database
GPG key ID: 777897D98DC91C54
5 changed files with 51 additions and 156 deletions

View file

@ -18,7 +18,6 @@ dyaml_src = [
'source/dyaml/escapes.d', 'source/dyaml/escapes.d',
'source/dyaml/event.d', 'source/dyaml/event.d',
'source/dyaml/exception.d', 'source/dyaml/exception.d',
'source/dyaml/fastcharsearch.d',
'source/dyaml/flags.d', 'source/dyaml/flags.d',
'source/dyaml/hacks.d', 'source/dyaml/hacks.d',
'source/dyaml/linebreak.d', 'source/dyaml/linebreak.d',

View file

@ -29,7 +29,6 @@ import dyaml.encoding;
import dyaml.escapes; import dyaml.escapes;
import dyaml.event; import dyaml.event;
import dyaml.exception; import dyaml.exception;
import dyaml.fastcharsearch;
import dyaml.flags; import dyaml.flags;
import dyaml.linebreak; import dyaml.linebreak;
import dyaml.queue; import dyaml.queue;
@ -63,8 +62,11 @@ struct ScalarAnalysis
private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029'); private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029');
// override the canFind added by the FastCharSearch mixins private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`');
private alias canFind = std.algorithm.canFind;
private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}');
private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t');
//Emits YAML events into a file/stream. //Emits YAML events into a file/stream.
struct Emitter struct Emitter
@ -1032,14 +1034,11 @@ struct Emitter
foreach(const size_t index, const dchar c; scalar) foreach(const size_t index, const dchar c; scalar)
{ {
mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
//Check for indicators. //Check for indicators.
if(index == 0) if(index == 0)
{ {
//Leading indicators are special characters. //Leading indicators are special characters.
if(specialCharSearch.canFind(c)) if(c.isSpecialChar)
{ {
flowIndicators = blockIndicators = true; flowIndicators = blockIndicators = true;
} }
@ -1056,7 +1055,7 @@ struct Emitter
else else
{ {
//Some indicators cannot appear within a scalar as well. //Some indicators cannot appear within a scalar as well.
if(flowIndicatorSearch.canFind(c)){flowIndicators = true;} if(c.isFlowIndicator){flowIndicators = true;}
if(c == ':') if(c == ':')
{ {
flowIndicators = true; flowIndicators = true;
@ -1099,11 +1098,10 @@ struct Emitter
previousSpace = previousBreak = false; previousSpace = previousBreak = false;
} }
mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
//Prepare for the next character. //Prepare for the next character.
preceededByWhitespace = spaceSearch.canFind(c); preceededByWhitespace = c.isSpace != 0;
followedByWhitespace = index + 2 >= scalar.length || followedByWhitespace = index + 2 >= scalar.length ||
spaceSearch.canFind(scalar[index + 2]); scalar[index + 2].isSpace;
} }
with(analysis.flags) with(analysis.flags)

View file

@ -1,95 +0,0 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module dyaml.fastcharsearch;
import std.algorithm;
import std.conv;
package:
/**
* Mixin used for fast searching for a character in string.
*
* Creates a lookup table to quickly determine if a character
* is present in the string. Size of the lookup table is limited;
* any characters not represented in the table will be checked
* by ordinary equality comparison.
*
* Params: chars = String to search in.
* tableSize = Maximum number of bytes used by the table.
*
* Generated method:
* bool canFind(dchar c)
*
* Determines if a character is in the string.
*/
template FastCharSearch(dstring chars, uint tableSize = 256)
{
private mixin(searchCode!(chars, tableSize)());
}
/// Generate the search table and the canFind method.
string searchCode(dstring chars, uint tableSize)()
{
import std.string;
const tableSizeStr = tableSize.to!string;
ubyte[tableSize] table;
table[] = 0;
//Characters that don't fit in the table.
dchar[] specialChars;
foreach(c; chars)
{
if(c < tableSize) { table[c] = 1; }
else { specialChars ~= c; }
}
string specialCharsCode()
{
return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || });
}
const caseInTable =
q{
if(c < %s)
{
return cast(immutable(bool))table_[c];
}
}.format(tableSize);
string code;
if(tableSize)
{
code ~=
q{
static immutable ubyte[%s] table_ = [
%s];
}.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, }));
}
code ~=
q{
bool canFind(const dchar c) @safe pure nothrow @nogc
{
%s
return %s;
}
}.format(tableSize ? caseInTable : "",
specialChars.length ? specialCharsCode() : q{false});
return code;
}
@safe unittest
{
mixin FastCharSearch!("+", 128) search;
assert(search.canFind('+'));
}

View file

@ -23,11 +23,10 @@ import std.utf;
import tinyendian; import tinyendian;
import dyaml.fastcharsearch;
import dyaml.encoding; import dyaml.encoding;
import dyaml.exception; import dyaml.exception;
alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029');
package: package:
@ -301,8 +300,6 @@ final class Reader
/// Params: length = Number of characters to move position forward. /// Params: length = Number of characters to move position forward.
void forward(size_t length) @safe pure void forward(size_t length) @safe pure
{ {
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
while(length > 0) while(length > 0)
{ {
auto asciiToTake = min(upcomingASCII_, length); auto asciiToTake = min(upcomingASCII_, length);
@ -339,7 +336,7 @@ final class Reader
const c = decode(buffer_, bufferOffset_); const c = decode(buffer_, bufferOffset_);
// New line. (can compare with '\n' without decoding since it's ASCII) // New line. (can compare with '\n' without decoding since it's ASCII)
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{ {
++line_; ++line_;
column_ = 0; column_ = 0;
@ -377,7 +374,6 @@ final class Reader
} }
// UTF-8 // UTF-8
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
assert(bufferOffset_ < buffer_.length, assert(bufferOffset_ < buffer_.length,
"Attempted to decode past the end of YAML buffer"); "Attempted to decode past the end of YAML buffer");
assert(buffer_[bufferOffset_] >= 0x80, assert(buffer_[bufferOffset_] >= 0x80,
@ -386,7 +382,7 @@ final class Reader
const c = decode(buffer_, bufferOffset_); const c = decode(buffer_, bufferOffset_);
// New line. (can compare with '\n' without decoding since it's ASCII) // New line. (can compare with '\n' without decoding since it's ASCII)
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{ {
++line_; ++line_;
column_ = 0; column_ = 0;

View file

@ -22,7 +22,6 @@ import std.typecons;
import std.traits : Unqual; import std.traits : Unqual;
import std.utf; import std.utf;
import dyaml.fastcharsearch;
import dyaml.escapes; import dyaml.escapes;
import dyaml.exception; import dyaml.exception;
import dyaml.queue; import dyaml.queue;
@ -53,6 +52,21 @@ package:
/// TAG(value) /// TAG(value)
/// SCALAR(value, plain, style) /// SCALAR(value, plain, style)
alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
/// Marked exception thrown at scanner errors. /// Marked exception thrown at scanner errors.
/// ///
@ -153,14 +167,6 @@ final class Scanner
/// not. /// not.
char[256] msgBuffer_; char[256] msgBuffer_;
/// Used to detect if a character is any whitespace plus '\0'
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
/// Used to detect if a character is any line break plus '\0'
mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks;
/// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch.
alias canFind = std.algorithm.canFind;
public: public:
/// Construct a Scanner using specified Reader. /// Construct a Scanner using specified Reader.
this(Reader reader) @safe nothrow this(Reader reader) @safe nothrow
@ -750,7 +756,7 @@ final class Scanner
return reader_.column == 0 && return reader_.column == 0 &&
reader_.peekByte() == '-' && reader_.peekByte() == '-' &&
reader_.prefix(3) == "---" && reader_.prefix(3) == "---" &&
searchAllWhitespace.canFind(reader_.peek(3)); reader_.peek(3).isWhiteSpace;
} }
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
@ -760,13 +766,13 @@ final class Scanner
return reader_.column == 0 && return reader_.column == 0 &&
reader_.peekByte() == '.' && reader_.peekByte() == '.' &&
reader_.prefix(3) == "..." && reader_.prefix(3) == "..." &&
searchAllWhitespace.canFind(reader_.peek(3)); reader_.peek(3).isWhiteSpace;
} }
/// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
bool checkBlockEntry() @safe bool checkBlockEntry() @safe
{ {
return searchAllWhitespace.canFind(reader_.peek(1)); return !!reader_.peek(1).isWhiteSpace;
} }
/// Check if the next token is KEY(flow context): '?' /// Check if the next token is KEY(flow context): '?'
@ -774,7 +780,7 @@ final class Scanner
/// or KEY(block context): '?' (' '|'\n') /// or KEY(block context): '?' (' '|'\n')
bool checkKey() @safe bool checkKey() @safe
{ {
return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1))); return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
} }
/// Check if the next token is VALUE(flow context): ':' /// Check if the next token is VALUE(flow context): ':'
@ -782,7 +788,7 @@ final class Scanner
/// or VALUE(block context): ':' (' '|'\n') /// or VALUE(block context): ':' (' '|'\n')
bool checkValue() @safe bool checkValue() @safe
{ {
return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)); return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
} }
/// Check if the next token is a plain scalar. /// Check if the next token is a plain scalar.
@ -802,13 +808,11 @@ final class Scanner
bool checkPlain() @safe bool checkPlain() @safe
{ {
const c = reader_.peek(); const c = reader_.peek();
mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d if(!c.isNonScalarStartCharacter)
searchPlainNotFirstChar;
if(!searchPlainNotFirstChar.canFind(c))
{ {
return true; return true;
} }
return !searchAllWhitespace.canFind(reader_.peek(1)) && return !reader_.peek(1).isWhiteSpace &&
(c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
} }
@ -844,7 +848,7 @@ final class Scanner
/// Scan and throw away all characters until next line break. /// Scan and throw away all characters until next line break.
void scanToNextBreak() @safe void scanToNextBreak() @safe
{ {
while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); } while(!reader_.peek().isBreak) { reader_.forward(); }
} }
/// Scan all characters until next line break. /// Scan all characters until next line break.
@ -854,7 +858,7 @@ final class Scanner
void scanToNextBreakToSlice() @safe void scanToNextBreakToSlice() @safe
{ {
uint length = 0; uint length = 0;
while(!searchAllBreaks.canFind(reader_.peek(length))) while(!reader_.peek(length).isBreak)
{ {
++length; ++length;
} }
@ -1068,7 +1072,7 @@ final class Scanner
{ {
findNextNonSpace(); findNextNonSpace();
if(reader_.peekByte() == '#') { scanToNextBreak(); } if(reader_.peekByte() == '#') { scanToNextBreak(); }
if(searchAllBreaks.canFind(reader_.peek())) if(reader_.peek().isBreak)
{ {
scanLineBreak(); scanLineBreak();
return; return;
@ -1102,7 +1106,7 @@ final class Scanner
char[] value = reader_.sliceBuilder.finish(); char[] value = reader_.sliceBuilder.finish();
if(error_) { return Token.init; } if(error_) { return Token.init; }
if(!searchAllWhitespace.canFind(reader_.peek()) && if(!reader_.peek().isWhiteSpace &&
!"?:,]}%@"d.canFind(reader_.peekByte())) !"?:,]}%@"d.canFind(reader_.peekByte()))
{ {
enum anchorCtx = "While scanning an anchor"; enum anchorCtx = "While scanning an anchor";
@ -1137,7 +1141,6 @@ final class Scanner
// (slice) we will produce. // (slice) we will produce.
uint handleEnd; uint handleEnd;
mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
if(c == '<') if(c == '<')
{ {
reader_.forward(2); reader_.forward(2);
@ -1153,7 +1156,7 @@ final class Scanner
} }
reader_.forward(); reader_.forward();
} }
else if(searchAllWhitespace.canFind(c)) else if(c.isWhiteSpace)
{ {
reader_.forward(); reader_.forward();
handleEnd = 0; handleEnd = 0;
@ -1164,7 +1167,7 @@ final class Scanner
uint length = 1; uint length = 1;
bool useHandle = false; bool useHandle = false;
while(!search.canFind(c)) while(!c.isBreakOrSpace)
{ {
if(c == '!') if(c == '!')
{ {
@ -1192,7 +1195,7 @@ final class Scanner
if(error_) { return Token.init; } if(error_) { return Token.init; }
} }
if(search.canFind(reader_.peek())) if(reader_.peek().isBreakOrSpace)
{ {
char[] slice = reader_.sliceBuilder.finish(); char[] slice = reader_.sliceBuilder.finish();
return tagToken(startMark, reader_.mark, slice, handleEnd); return tagToken(startMark, reader_.mark, slice, handleEnd);
@ -1431,7 +1434,7 @@ final class Scanner
findNextNonSpace(); findNextNonSpace();
if(reader_.peekByte()== '#') { scanToNextBreak(); } if(reader_.peekByte()== '#') { scanToNextBreak(); }
if(searchAllBreaks.canFind(reader_.peek())) if(reader_.peek().isBreak)
{ {
scanLineBreak(); scanLineBreak();
return; return;
@ -1523,8 +1526,6 @@ final class Scanner
{ {
dchar c = reader_.peek(); dchar c = reader_.peek();
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
size_t numCodePoints = 0; size_t numCodePoints = 0;
// This is an optimized way of writing: // This is an optimized way of writing:
// while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; } // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
@ -1543,7 +1544,7 @@ final class Scanner
{ {
// slice is UTF-8 - need to decode // slice is UTF-8 - need to decode
const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i); const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
if(search.canFind(ch)) { break outer; } if(ch.isFlowScalarBreakSpace) { break outer; }
++numCodePoints; ++numCodePoints;
} }
oldSliceLength = slice.length; oldSliceLength = slice.length;
@ -1686,7 +1687,7 @@ final class Scanner
// Instead of checking indentation, we check for document separators. // Instead of checking indentation, we check for document separators.
const prefix = reader_.prefix(3); const prefix = reader_.prefix(3);
if((prefix == "---" || prefix == "...") && if((prefix == "---" || prefix == "...") &&
searchAllWhitespace.canFind(reader_.peek(3))) reader_.peek(3).isWhiteSpace)
{ {
error("While scanning a quoted scalar", startMark, error("While scanning a quoted scalar", startMark,
"found unexpected document separator", reader_.mark); "found unexpected document separator", reader_.mark);
@ -1738,8 +1739,8 @@ final class Scanner
for(;;) for(;;)
{ {
const cNext = reader_.peek(length + 1); const cNext = reader_.peek(length + 1);
if(searchAllWhitespace.canFind(c) || if(c.isWhiteSpace ||
(c == ':' && searchAllWhitespace.canFind(cNext))) (c == ':' && cNext.isWhiteSpace))
{ {
break; break;
} }
@ -1752,7 +1753,7 @@ final class Scanner
for(;;) for(;;)
{ {
c = reader_.peek(length); c = reader_.peek(length);
if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c)) if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
{ {
break; break;
} }
@ -1762,7 +1763,7 @@ final class Scanner
// It's not clear what we should do with ':' in the flow context. // It's not clear what we should do with ':' in the flow context.
if(flowLevel_ > 0 && c == ':' && if(flowLevel_ > 0 && c == ':' &&
!searchAllWhitespace.canFind(reader_.peek(length + 1)) && !reader_.peek(length + 1).isWhiteSpace &&
!",[]{}"d.canFind(reader_.peek(length + 1))) !",[]{}"d.canFind(reader_.peek(length + 1)))
{ {
// This is an error; throw the slice away. // This is an error; throw the slice away.
@ -1818,10 +1819,7 @@ final class Scanner
reader_.forward(length); reader_.forward(length);
dchar c = reader_.peek(); dchar c = reader_.peek();
mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search; if(!c.isNSChar)
// No newline after the spaces (if any)
// (Excluding ' ' so we can use the same FastCharSearch as below)
if(!search.canFind(c) && c != ' ')
{ {
// We have spaces, but no newline. // We have spaces, but no newline.
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
@ -1846,7 +1844,7 @@ final class Scanner
alias Transaction = SliceBuilder.Transaction; alias Transaction = SliceBuilder.Transaction;
auto transaction = Transaction(&reader_.sliceBuilder); auto transaction = Transaction(&reader_.sliceBuilder);
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
while(search.canFind(reader_.peek())) while(reader_.peek().isNSChar)
{ {
if(reader_.peekByte() == ' ') { reader_.forward(); } if(reader_.peekByte() == ' ') { reader_.forward(); }
else else
@ -1914,8 +1912,7 @@ final class Scanner
const startLen = reader_.sliceBuilder.length; const startLen = reader_.sliceBuilder.length;
{ {
uint length = 0; uint length = 0;
mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search; while(c.isAlphaNum || c.isURIChar)
while(c.isAlphaNum || search.canFind(c))
{ {
if(c == '%') if(c == '%')
{ {