remove fastcharsearch

This commit is contained in:
Cameron Ross 2018-04-21 01:52:58 -03:00
parent 993dd6c5bc
commit cd88b7ad17
No known key found for this signature in database
GPG key ID: 777897D98DC91C54
5 changed files with 51 additions and 156 deletions

View file

@ -18,7 +18,6 @@ dyaml_src = [
'source/dyaml/escapes.d',
'source/dyaml/event.d',
'source/dyaml/exception.d',
'source/dyaml/fastcharsearch.d',
'source/dyaml/flags.d',
'source/dyaml/hacks.d',
'source/dyaml/linebreak.d',

View file

@ -29,7 +29,6 @@ import dyaml.encoding;
import dyaml.escapes;
import dyaml.event;
import dyaml.exception;
import dyaml.fastcharsearch;
import dyaml.flags;
import dyaml.linebreak;
import dyaml.queue;
@ -63,8 +62,11 @@ struct ScalarAnalysis
private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029');
// override the canFind added by the FastCharSearch mixins
private alias canFind = std.algorithm.canFind;
private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`');
private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}');
private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t');
//Emits YAML events into a file/stream.
struct Emitter
@ -1032,14 +1034,11 @@ struct Emitter
foreach(const size_t index, const dchar c; scalar)
{
mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
//Check for indicators.
if(index == 0)
{
//Leading indicators are special characters.
if(specialCharSearch.canFind(c))
if(c.isSpecialChar)
{
flowIndicators = blockIndicators = true;
}
@ -1056,7 +1055,7 @@ struct Emitter
else
{
//Some indicators cannot appear within a scalar as well.
if(flowIndicatorSearch.canFind(c)){flowIndicators = true;}
if(c.isFlowIndicator){flowIndicators = true;}
if(c == ':')
{
flowIndicators = true;
@ -1099,11 +1098,10 @@ struct Emitter
previousSpace = previousBreak = false;
}
mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
//Prepare for the next character.
preceededByWhitespace = spaceSearch.canFind(c);
preceededByWhitespace = c.isSpace != 0;
followedByWhitespace = index + 2 >= scalar.length ||
spaceSearch.canFind(scalar[index + 2]);
scalar[index + 2].isSpace;
}
with(analysis.flags)

View file

@ -1,95 +0,0 @@
// Copyright Ferdinand Majerech 2011.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module dyaml.fastcharsearch;
import std.algorithm;
import std.conv;
package:
/**
* Mixin used for fast searching for a character in string.
*
* Creates a lookup table to quickly determine if a character
* is present in the string. Size of the lookup table is limited;
* any characters not represented in the table will be checked
* by ordinary equality comparison.
*
* Params: chars = String to search in.
* tableSize = Maximum number of bytes used by the table.
*
* Generated method:
* bool canFind(dchar c)
*
* Determines if a character is in the string.
*/
template FastCharSearch(dstring chars, uint tableSize = 256)
{
private mixin(searchCode!(chars, tableSize)());
}
/// Generate the search table and the canFind method.
string searchCode(dstring chars, uint tableSize)()
{
import std.string;
const tableSizeStr = tableSize.to!string;
ubyte[tableSize] table;
table[] = 0;
//Characters that don't fit in the table.
dchar[] specialChars;
foreach(c; chars)
{
if(c < tableSize) { table[c] = 1; }
else { specialChars ~= c; }
}
string specialCharsCode()
{
return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || });
}
const caseInTable =
q{
if(c < %s)
{
return cast(immutable(bool))table_[c];
}
}.format(tableSize);
string code;
if(tableSize)
{
code ~=
q{
static immutable ubyte[%s] table_ = [
%s];
}.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, }));
}
code ~=
q{
bool canFind(const dchar c) @safe pure nothrow @nogc
{
%s
return %s;
}
}.format(tableSize ? caseInTable : "",
specialChars.length ? specialCharsCode() : q{false});
return code;
}
@safe unittest
{
mixin FastCharSearch!("+", 128) search;
assert(search.canFind('+'));
}

View file

@ -23,11 +23,10 @@ import std.utf;
import tinyendian;
import dyaml.fastcharsearch;
import dyaml.encoding;
import dyaml.exception;
alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029');
package:
@ -301,8 +300,6 @@ final class Reader
/// Params: length = Number of characters to move position forward.
void forward(size_t length) @safe pure
{
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
while(length > 0)
{
auto asciiToTake = min(upcomingASCII_, length);
@ -339,7 +336,7 @@ final class Reader
const c = decode(buffer_, bufferOffset_);
// New line. (can compare with '\n' without decoding since it's ASCII)
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{
++line_;
column_ = 0;
@ -377,7 +374,6 @@ final class Reader
}
// UTF-8
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
assert(bufferOffset_ < buffer_.length,
"Attempted to decode past the end of YAML buffer");
assert(buffer_[bufferOffset_] >= 0x80,
@ -386,7 +382,7 @@ final class Reader
const c = decode(buffer_, bufferOffset_);
// New line. (can compare with '\n' without decoding since it's ASCII)
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
{
++line_;
column_ = 0;

View file

@ -22,7 +22,6 @@ import std.typecons;
import std.traits : Unqual;
import std.utf;
import dyaml.fastcharsearch;
import dyaml.escapes;
import dyaml.exception;
import dyaml.queue;
@ -53,6 +52,21 @@ package:
/// TAG(value)
/// SCALAR(value, plain, style)
alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
/// Marked exception thrown at scanner errors.
///
@ -153,14 +167,6 @@ final class Scanner
/// not.
char[256] msgBuffer_;
/// Used to detect if a character is any whitespace plus '\0'
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
/// Used to detect if a character is any line break plus '\0'
mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks;
/// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch.
alias canFind = std.algorithm.canFind;
public:
/// Construct a Scanner using specified Reader.
this(Reader reader) @safe nothrow
@ -750,7 +756,7 @@ final class Scanner
return reader_.column == 0 &&
reader_.peekByte() == '-' &&
reader_.prefix(3) == "---" &&
searchAllWhitespace.canFind(reader_.peek(3));
reader_.peek(3).isWhiteSpace;
}
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
@ -760,13 +766,13 @@ final class Scanner
return reader_.column == 0 &&
reader_.peekByte() == '.' &&
reader_.prefix(3) == "..." &&
searchAllWhitespace.canFind(reader_.peek(3));
reader_.peek(3).isWhiteSpace;
}
/// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
bool checkBlockEntry() @safe
{
return searchAllWhitespace.canFind(reader_.peek(1));
return !!reader_.peek(1).isWhiteSpace;
}
/// Check if the next token is KEY(flow context): '?'
@ -774,7 +780,7 @@ final class Scanner
/// or KEY(block context): '?' (' '|'\n')
bool checkKey() @safe
{
return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)));
return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
}
/// Check if the next token is VALUE(flow context): ':'
@ -782,7 +788,7 @@ final class Scanner
/// or VALUE(block context): ':' (' '|'\n')
bool checkValue() @safe
{
return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1));
return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
}
/// Check if the next token is a plain scalar.
@ -802,13 +808,11 @@ final class Scanner
bool checkPlain() @safe
{
const c = reader_.peek();
mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d
searchPlainNotFirstChar;
if(!searchPlainNotFirstChar.canFind(c))
if(!c.isNonScalarStartCharacter)
{
return true;
}
return !searchAllWhitespace.canFind(reader_.peek(1)) &&
return !reader_.peek(1).isWhiteSpace &&
(c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
}
@ -844,7 +848,7 @@ final class Scanner
/// Scan and throw away all characters until next line break.
void scanToNextBreak() @safe
{
while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); }
while(!reader_.peek().isBreak) { reader_.forward(); }
}
/// Scan all characters until next line break.
@ -854,7 +858,7 @@ final class Scanner
void scanToNextBreakToSlice() @safe
{
uint length = 0;
while(!searchAllBreaks.canFind(reader_.peek(length)))
while(!reader_.peek(length).isBreak)
{
++length;
}
@ -1068,7 +1072,7 @@ final class Scanner
{
findNextNonSpace();
if(reader_.peekByte() == '#') { scanToNextBreak(); }
if(searchAllBreaks.canFind(reader_.peek()))
if(reader_.peek().isBreak)
{
scanLineBreak();
return;
@ -1102,7 +1106,7 @@ final class Scanner
char[] value = reader_.sliceBuilder.finish();
if(error_) { return Token.init; }
if(!searchAllWhitespace.canFind(reader_.peek()) &&
if(!reader_.peek().isWhiteSpace &&
!"?:,]}%@"d.canFind(reader_.peekByte()))
{
enum anchorCtx = "While scanning an anchor";
@ -1137,7 +1141,6 @@ final class Scanner
// (slice) we will produce.
uint handleEnd;
mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
if(c == '<')
{
reader_.forward(2);
@ -1153,7 +1156,7 @@ final class Scanner
}
reader_.forward();
}
else if(searchAllWhitespace.canFind(c))
else if(c.isWhiteSpace)
{
reader_.forward();
handleEnd = 0;
@ -1164,7 +1167,7 @@ final class Scanner
uint length = 1;
bool useHandle = false;
while(!search.canFind(c))
while(!c.isBreakOrSpace)
{
if(c == '!')
{
@ -1192,7 +1195,7 @@ final class Scanner
if(error_) { return Token.init; }
}
if(search.canFind(reader_.peek()))
if(reader_.peek().isBreakOrSpace)
{
char[] slice = reader_.sliceBuilder.finish();
return tagToken(startMark, reader_.mark, slice, handleEnd);
@ -1431,7 +1434,7 @@ final class Scanner
findNextNonSpace();
if(reader_.peekByte()== '#') { scanToNextBreak(); }
if(searchAllBreaks.canFind(reader_.peek()))
if(reader_.peek().isBreak)
{
scanLineBreak();
return;
@ -1523,8 +1526,6 @@ final class Scanner
{
dchar c = reader_.peek();
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
size_t numCodePoints = 0;
// This is an optimized way of writing:
// while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
@ -1543,7 +1544,7 @@ final class Scanner
{
// slice is UTF-8 - need to decode
const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
if(search.canFind(ch)) { break outer; }
if(ch.isFlowScalarBreakSpace) { break outer; }
++numCodePoints;
}
oldSliceLength = slice.length;
@ -1686,7 +1687,7 @@ final class Scanner
// Instead of checking indentation, we check for document separators.
const prefix = reader_.prefix(3);
if((prefix == "---" || prefix == "...") &&
searchAllWhitespace.canFind(reader_.peek(3)))
reader_.peek(3).isWhiteSpace)
{
error("While scanning a quoted scalar", startMark,
"found unexpected document separator", reader_.mark);
@ -1738,8 +1739,8 @@ final class Scanner
for(;;)
{
const cNext = reader_.peek(length + 1);
if(searchAllWhitespace.canFind(c) ||
(c == ':' && searchAllWhitespace.canFind(cNext)))
if(c.isWhiteSpace ||
(c == ':' && cNext.isWhiteSpace))
{
break;
}
@ -1752,7 +1753,7 @@ final class Scanner
for(;;)
{
c = reader_.peek(length);
if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c))
if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
{
break;
}
@ -1762,7 +1763,7 @@ final class Scanner
// It's not clear what we should do with ':' in the flow context.
if(flowLevel_ > 0 && c == ':' &&
!searchAllWhitespace.canFind(reader_.peek(length + 1)) &&
!reader_.peek(length + 1).isWhiteSpace &&
!",[]{}"d.canFind(reader_.peek(length + 1)))
{
// This is an error; throw the slice away.
@ -1818,10 +1819,7 @@ final class Scanner
reader_.forward(length);
dchar c = reader_.peek();
mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search;
// No newline after the spaces (if any)
// (Excluding ' ' so we can use the same FastCharSearch as below)
if(!search.canFind(c) && c != ' ')
if(!c.isNSChar)
{
// We have spaces, but no newline.
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
@ -1846,7 +1844,7 @@ final class Scanner
alias Transaction = SliceBuilder.Transaction;
auto transaction = Transaction(&reader_.sliceBuilder);
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
while(search.canFind(reader_.peek()))
while(reader_.peek().isNSChar)
{
if(reader_.peekByte() == ' ') { reader_.forward(); }
else
@ -1914,8 +1912,7 @@ final class Scanner
const startLen = reader_.sliceBuilder.length;
{
uint length = 0;
mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search;
while(c.isAlphaNum || search.canFind(c))
while(c.isAlphaNum || c.isURIChar)
{
if(c == '%')
{