remove fastcharsearch
This commit is contained in:
parent
993dd6c5bc
commit
cd88b7ad17
|
@ -18,7 +18,6 @@ dyaml_src = [
|
|||
'source/dyaml/escapes.d',
|
||||
'source/dyaml/event.d',
|
||||
'source/dyaml/exception.d',
|
||||
'source/dyaml/fastcharsearch.d',
|
||||
'source/dyaml/flags.d',
|
||||
'source/dyaml/hacks.d',
|
||||
'source/dyaml/linebreak.d',
|
||||
|
|
|
@ -29,7 +29,6 @@ import dyaml.encoding;
|
|||
import dyaml.escapes;
|
||||
import dyaml.event;
|
||||
import dyaml.exception;
|
||||
import dyaml.fastcharsearch;
|
||||
import dyaml.flags;
|
||||
import dyaml.linebreak;
|
||||
import dyaml.queue;
|
||||
|
@ -63,8 +62,11 @@ struct ScalarAnalysis
|
|||
|
||||
private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
// override the canFind added by the FastCharSearch mixins
|
||||
private alias canFind = std.algorithm.canFind;
|
||||
private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`');
|
||||
|
||||
private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}');
|
||||
|
||||
private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t');
|
||||
|
||||
//Emits YAML events into a file/stream.
|
||||
struct Emitter
|
||||
|
@ -1032,14 +1034,11 @@ struct Emitter
|
|||
|
||||
foreach(const size_t index, const dchar c; scalar)
|
||||
{
|
||||
mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
|
||||
mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
|
||||
|
||||
//Check for indicators.
|
||||
if(index == 0)
|
||||
{
|
||||
//Leading indicators are special characters.
|
||||
if(specialCharSearch.canFind(c))
|
||||
if(c.isSpecialChar)
|
||||
{
|
||||
flowIndicators = blockIndicators = true;
|
||||
}
|
||||
|
@ -1056,7 +1055,7 @@ struct Emitter
|
|||
else
|
||||
{
|
||||
//Some indicators cannot appear within a scalar as well.
|
||||
if(flowIndicatorSearch.canFind(c)){flowIndicators = true;}
|
||||
if(c.isFlowIndicator){flowIndicators = true;}
|
||||
if(c == ':')
|
||||
{
|
||||
flowIndicators = true;
|
||||
|
@ -1099,11 +1098,10 @@ struct Emitter
|
|||
previousSpace = previousBreak = false;
|
||||
}
|
||||
|
||||
mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
|
||||
//Prepare for the next character.
|
||||
preceededByWhitespace = spaceSearch.canFind(c);
|
||||
preceededByWhitespace = c.isSpace != 0;
|
||||
followedByWhitespace = index + 2 >= scalar.length ||
|
||||
spaceSearch.canFind(scalar[index + 2]);
|
||||
scalar[index + 2].isSpace;
|
||||
}
|
||||
|
||||
with(analysis.flags)
|
||||
|
|
|
@ -1,95 +0,0 @@
|
|||
|
||||
// Copyright Ferdinand Majerech 2011.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
module dyaml.fastcharsearch;
|
||||
|
||||
|
||||
import std.algorithm;
|
||||
import std.conv;
|
||||
|
||||
|
||||
package:
|
||||
|
||||
/**
|
||||
* Mixin used for fast searching for a character in string.
|
||||
*
|
||||
* Creates a lookup table to quickly determine if a character
|
||||
* is present in the string. Size of the lookup table is limited;
|
||||
* any characters not represented in the table will be checked
|
||||
* by ordinary equality comparison.
|
||||
*
|
||||
* Params: chars = String to search in.
|
||||
* tableSize = Maximum number of bytes used by the table.
|
||||
*
|
||||
* Generated method:
|
||||
* bool canFind(dchar c)
|
||||
*
|
||||
* Determines if a character is in the string.
|
||||
*/
|
||||
template FastCharSearch(dstring chars, uint tableSize = 256)
|
||||
{
|
||||
private mixin(searchCode!(chars, tableSize)());
|
||||
}
|
||||
|
||||
/// Generate the search table and the canFind method.
|
||||
string searchCode(dstring chars, uint tableSize)()
|
||||
{
|
||||
import std.string;
|
||||
|
||||
const tableSizeStr = tableSize.to!string;
|
||||
ubyte[tableSize] table;
|
||||
table[] = 0;
|
||||
|
||||
//Characters that don't fit in the table.
|
||||
dchar[] specialChars;
|
||||
|
||||
foreach(c; chars)
|
||||
{
|
||||
if(c < tableSize) { table[c] = 1; }
|
||||
else { specialChars ~= c; }
|
||||
}
|
||||
|
||||
string specialCharsCode()
|
||||
{
|
||||
return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || });
|
||||
}
|
||||
|
||||
const caseInTable =
|
||||
q{
|
||||
if(c < %s)
|
||||
{
|
||||
return cast(immutable(bool))table_[c];
|
||||
}
|
||||
}.format(tableSize);
|
||||
|
||||
string code;
|
||||
if(tableSize)
|
||||
{
|
||||
code ~=
|
||||
q{
|
||||
static immutable ubyte[%s] table_ = [
|
||||
%s];
|
||||
}.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, }));
|
||||
}
|
||||
code ~=
|
||||
q{
|
||||
bool canFind(const dchar c) @safe pure nothrow @nogc
|
||||
{
|
||||
%s
|
||||
|
||||
return %s;
|
||||
}
|
||||
}.format(tableSize ? caseInTable : "",
|
||||
specialChars.length ? specialCharsCode() : q{false});
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
@safe unittest
|
||||
{
|
||||
mixin FastCharSearch!("+", 128) search;
|
||||
assert(search.canFind('+'));
|
||||
}
|
|
@ -23,11 +23,10 @@ import std.utf;
|
|||
|
||||
import tinyendian;
|
||||
|
||||
import dyaml.fastcharsearch;
|
||||
import dyaml.encoding;
|
||||
import dyaml.exception;
|
||||
|
||||
|
||||
alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
package:
|
||||
|
||||
|
@ -301,8 +300,6 @@ final class Reader
|
|||
/// Params: length = Number of characters to move position forward.
|
||||
void forward(size_t length) @safe pure
|
||||
{
|
||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||
|
||||
while(length > 0)
|
||||
{
|
||||
auto asciiToTake = min(upcomingASCII_, length);
|
||||
|
@ -339,7 +336,7 @@ final class Reader
|
|||
const c = decode(buffer_, bufferOffset_);
|
||||
|
||||
// New line. (can compare with '\n' without decoding since it's ASCII)
|
||||
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||
if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||
{
|
||||
++line_;
|
||||
column_ = 0;
|
||||
|
@ -377,7 +374,6 @@ final class Reader
|
|||
}
|
||||
|
||||
// UTF-8
|
||||
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
||||
assert(bufferOffset_ < buffer_.length,
|
||||
"Attempted to decode past the end of YAML buffer");
|
||||
assert(buffer_[bufferOffset_] >= 0x80,
|
||||
|
@ -386,7 +382,7 @@ final class Reader
|
|||
const c = decode(buffer_, bufferOffset_);
|
||||
|
||||
// New line. (can compare with '\n' without decoding since it's ASCII)
|
||||
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||
if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
||||
{
|
||||
++line_;
|
||||
column_ = 0;
|
||||
|
|
|
@ -22,7 +22,6 @@ import std.typecons;
|
|||
import std.traits : Unqual;
|
||||
import std.utf;
|
||||
|
||||
import dyaml.fastcharsearch;
|
||||
import dyaml.escapes;
|
||||
import dyaml.exception;
|
||||
import dyaml.queue;
|
||||
|
@ -53,6 +52,21 @@ package:
|
|||
/// TAG(value)
|
||||
/// SCALAR(value, plain, style)
|
||||
|
||||
alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
|
||||
|
||||
alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
|
||||
|
||||
alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
|
||||
|
||||
/// Marked exception thrown at scanner errors.
|
||||
///
|
||||
|
@ -153,14 +167,6 @@ final class Scanner
|
|||
/// not.
|
||||
char[256] msgBuffer_;
|
||||
|
||||
/// Used to detect if a character is any whitespace plus '\0'
|
||||
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
|
||||
/// Used to detect if a character is any line break plus '\0'
|
||||
mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks;
|
||||
|
||||
/// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch.
|
||||
alias canFind = std.algorithm.canFind;
|
||||
|
||||
public:
|
||||
/// Construct a Scanner using specified Reader.
|
||||
this(Reader reader) @safe nothrow
|
||||
|
@ -750,7 +756,7 @@ final class Scanner
|
|||
return reader_.column == 0 &&
|
||||
reader_.peekByte() == '-' &&
|
||||
reader_.prefix(3) == "---" &&
|
||||
searchAllWhitespace.canFind(reader_.peek(3));
|
||||
reader_.peek(3).isWhiteSpace;
|
||||
}
|
||||
|
||||
/// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n')
|
||||
|
@ -760,13 +766,13 @@ final class Scanner
|
|||
return reader_.column == 0 &&
|
||||
reader_.peekByte() == '.' &&
|
||||
reader_.prefix(3) == "..." &&
|
||||
searchAllWhitespace.canFind(reader_.peek(3));
|
||||
reader_.peek(3).isWhiteSpace;
|
||||
}
|
||||
|
||||
/// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n')
|
||||
bool checkBlockEntry() @safe
|
||||
{
|
||||
return searchAllWhitespace.canFind(reader_.peek(1));
|
||||
return !!reader_.peek(1).isWhiteSpace;
|
||||
}
|
||||
|
||||
/// Check if the next token is KEY(flow context): '?'
|
||||
|
@ -774,7 +780,7 @@ final class Scanner
|
|||
/// or KEY(block context): '?' (' '|'\n')
|
||||
bool checkKey() @safe
|
||||
{
|
||||
return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)));
|
||||
return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
|
||||
}
|
||||
|
||||
/// Check if the next token is VALUE(flow context): ':'
|
||||
|
@ -782,7 +788,7 @@ final class Scanner
|
|||
/// or VALUE(block context): ':' (' '|'\n')
|
||||
bool checkValue() @safe
|
||||
{
|
||||
return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1));
|
||||
return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
|
||||
}
|
||||
|
||||
/// Check if the next token is a plain scalar.
|
||||
|
@ -802,13 +808,11 @@ final class Scanner
|
|||
bool checkPlain() @safe
|
||||
{
|
||||
const c = reader_.peek();
|
||||
mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d
|
||||
searchPlainNotFirstChar;
|
||||
if(!searchPlainNotFirstChar.canFind(c))
|
||||
if(!c.isNonScalarStartCharacter)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return !searchAllWhitespace.canFind(reader_.peek(1)) &&
|
||||
return !reader_.peek(1).isWhiteSpace &&
|
||||
(c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
|
||||
}
|
||||
|
||||
|
@ -844,7 +848,7 @@ final class Scanner
|
|||
/// Scan and throw away all characters until next line break.
|
||||
void scanToNextBreak() @safe
|
||||
{
|
||||
while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); }
|
||||
while(!reader_.peek().isBreak) { reader_.forward(); }
|
||||
}
|
||||
|
||||
/// Scan all characters until next line break.
|
||||
|
@ -854,7 +858,7 @@ final class Scanner
|
|||
void scanToNextBreakToSlice() @safe
|
||||
{
|
||||
uint length = 0;
|
||||
while(!searchAllBreaks.canFind(reader_.peek(length)))
|
||||
while(!reader_.peek(length).isBreak)
|
||||
{
|
||||
++length;
|
||||
}
|
||||
|
@ -1068,7 +1072,7 @@ final class Scanner
|
|||
{
|
||||
findNextNonSpace();
|
||||
if(reader_.peekByte() == '#') { scanToNextBreak(); }
|
||||
if(searchAllBreaks.canFind(reader_.peek()))
|
||||
if(reader_.peek().isBreak)
|
||||
{
|
||||
scanLineBreak();
|
||||
return;
|
||||
|
@ -1102,7 +1106,7 @@ final class Scanner
|
|||
char[] value = reader_.sliceBuilder.finish();
|
||||
if(error_) { return Token.init; }
|
||||
|
||||
if(!searchAllWhitespace.canFind(reader_.peek()) &&
|
||||
if(!reader_.peek().isWhiteSpace &&
|
||||
!"?:,]}%@"d.canFind(reader_.peekByte()))
|
||||
{
|
||||
enum anchorCtx = "While scanning an anchor";
|
||||
|
@ -1137,7 +1141,6 @@ final class Scanner
|
|||
// (slice) we will produce.
|
||||
uint handleEnd;
|
||||
|
||||
mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
|
||||
if(c == '<')
|
||||
{
|
||||
reader_.forward(2);
|
||||
|
@ -1153,7 +1156,7 @@ final class Scanner
|
|||
}
|
||||
reader_.forward();
|
||||
}
|
||||
else if(searchAllWhitespace.canFind(c))
|
||||
else if(c.isWhiteSpace)
|
||||
{
|
||||
reader_.forward();
|
||||
handleEnd = 0;
|
||||
|
@ -1164,7 +1167,7 @@ final class Scanner
|
|||
uint length = 1;
|
||||
bool useHandle = false;
|
||||
|
||||
while(!search.canFind(c))
|
||||
while(!c.isBreakOrSpace)
|
||||
{
|
||||
if(c == '!')
|
||||
{
|
||||
|
@ -1192,7 +1195,7 @@ final class Scanner
|
|||
if(error_) { return Token.init; }
|
||||
}
|
||||
|
||||
if(search.canFind(reader_.peek()))
|
||||
if(reader_.peek().isBreakOrSpace)
|
||||
{
|
||||
char[] slice = reader_.sliceBuilder.finish();
|
||||
return tagToken(startMark, reader_.mark, slice, handleEnd);
|
||||
|
@ -1431,7 +1434,7 @@ final class Scanner
|
|||
findNextNonSpace();
|
||||
if(reader_.peekByte()== '#') { scanToNextBreak(); }
|
||||
|
||||
if(searchAllBreaks.canFind(reader_.peek()))
|
||||
if(reader_.peek().isBreak)
|
||||
{
|
||||
scanLineBreak();
|
||||
return;
|
||||
|
@ -1523,8 +1526,6 @@ final class Scanner
|
|||
{
|
||||
dchar c = reader_.peek();
|
||||
|
||||
mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
|
||||
|
||||
size_t numCodePoints = 0;
|
||||
// This is an optimized way of writing:
|
||||
// while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
|
||||
|
@ -1543,7 +1544,7 @@ final class Scanner
|
|||
{
|
||||
// slice is UTF-8 - need to decode
|
||||
const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
|
||||
if(search.canFind(ch)) { break outer; }
|
||||
if(ch.isFlowScalarBreakSpace) { break outer; }
|
||||
++numCodePoints;
|
||||
}
|
||||
oldSliceLength = slice.length;
|
||||
|
@ -1686,7 +1687,7 @@ final class Scanner
|
|||
// Instead of checking indentation, we check for document separators.
|
||||
const prefix = reader_.prefix(3);
|
||||
if((prefix == "---" || prefix == "...") &&
|
||||
searchAllWhitespace.canFind(reader_.peek(3)))
|
||||
reader_.peek(3).isWhiteSpace)
|
||||
{
|
||||
error("While scanning a quoted scalar", startMark,
|
||||
"found unexpected document separator", reader_.mark);
|
||||
|
@ -1738,8 +1739,8 @@ final class Scanner
|
|||
for(;;)
|
||||
{
|
||||
const cNext = reader_.peek(length + 1);
|
||||
if(searchAllWhitespace.canFind(c) ||
|
||||
(c == ':' && searchAllWhitespace.canFind(cNext)))
|
||||
if(c.isWhiteSpace ||
|
||||
(c == ':' && cNext.isWhiteSpace))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1752,7 +1753,7 @@ final class Scanner
|
|||
for(;;)
|
||||
{
|
||||
c = reader_.peek(length);
|
||||
if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c))
|
||||
if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1762,7 +1763,7 @@ final class Scanner
|
|||
|
||||
// It's not clear what we should do with ':' in the flow context.
|
||||
if(flowLevel_ > 0 && c == ':' &&
|
||||
!searchAllWhitespace.canFind(reader_.peek(length + 1)) &&
|
||||
!reader_.peek(length + 1).isWhiteSpace &&
|
||||
!",[]{}"d.canFind(reader_.peek(length + 1)))
|
||||
{
|
||||
// This is an error; throw the slice away.
|
||||
|
@ -1818,10 +1819,7 @@ final class Scanner
|
|||
reader_.forward(length);
|
||||
|
||||
dchar c = reader_.peek();
|
||||
mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search;
|
||||
// No newline after the spaces (if any)
|
||||
// (Excluding ' ' so we can use the same FastCharSearch as below)
|
||||
if(!search.canFind(c) && c != ' ')
|
||||
if(!c.isNSChar)
|
||||
{
|
||||
// We have spaces, but no newline.
|
||||
if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
|
||||
|
@ -1846,7 +1844,7 @@ final class Scanner
|
|||
alias Transaction = SliceBuilder.Transaction;
|
||||
auto transaction = Transaction(&reader_.sliceBuilder);
|
||||
if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
|
||||
while(search.canFind(reader_.peek()))
|
||||
while(reader_.peek().isNSChar)
|
||||
{
|
||||
if(reader_.peekByte() == ' ') { reader_.forward(); }
|
||||
else
|
||||
|
@ -1914,8 +1912,7 @@ final class Scanner
|
|||
const startLen = reader_.sliceBuilder.length;
|
||||
{
|
||||
uint length = 0;
|
||||
mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search;
|
||||
while(c.isAlphaNum || search.canFind(c))
|
||||
while(c.isAlphaNum || c.isURIChar)
|
||||
{
|
||||
if(c == '%')
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue