From cd88b7ad17eaae0fa92c2df13b298e334ec35b21 Mon Sep 17 00:00:00 2001 From: Cameron Ross Date: Sat, 21 Apr 2018 01:52:58 -0300 Subject: [PATCH] remove fastcharsearch --- meson.build | 1 - source/dyaml/emitter.d | 20 ++++---- source/dyaml/fastcharsearch.d | 95 ----------------------------------- source/dyaml/reader.d | 10 ++-- source/dyaml/scanner.d | 81 ++++++++++++++--------------- 5 files changed, 51 insertions(+), 156 deletions(-) delete mode 100644 source/dyaml/fastcharsearch.d diff --git a/meson.build b/meson.build index 053912b..3590a1f 100644 --- a/meson.build +++ b/meson.build @@ -18,7 +18,6 @@ dyaml_src = [ 'source/dyaml/escapes.d', 'source/dyaml/event.d', 'source/dyaml/exception.d', - 'source/dyaml/fastcharsearch.d', 'source/dyaml/flags.d', 'source/dyaml/hacks.d', 'source/dyaml/linebreak.d', diff --git a/source/dyaml/emitter.d b/source/dyaml/emitter.d index 06dd4e8..e52769c 100644 --- a/source/dyaml/emitter.d +++ b/source/dyaml/emitter.d @@ -29,7 +29,6 @@ import dyaml.encoding; import dyaml.escapes; import dyaml.event; import dyaml.exception; -import dyaml.fastcharsearch; import dyaml.flags; import dyaml.linebreak; import dyaml.queue; @@ -63,8 +62,11 @@ struct ScalarAnalysis private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029'); -// override the canFind added by the FastCharSearch mixins -private alias canFind = std.algorithm.canFind; +private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`'); + +private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}'); + +private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t'); //Emits YAML events into a file/stream. struct Emitter @@ -1032,14 +1034,11 @@ struct Emitter foreach(const size_t index, const dchar c; scalar) { - mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch; - mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch; - //Check for indicators. if(index == 0) { //Leading indicators are special characters. - if(specialCharSearch.canFind(c)) + if(c.isSpecialChar) { flowIndicators = blockIndicators = true; } @@ -1056,7 +1055,7 @@ struct Emitter else { //Some indicators cannot appear within a scalar as well. - if(flowIndicatorSearch.canFind(c)){flowIndicators = true;} + if(c.isFlowIndicator){flowIndicators = true;} if(c == ':') { flowIndicators = true; @@ -1099,11 +1098,10 @@ struct Emitter previousSpace = previousBreak = false; } - mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch; //Prepare for the next character. - preceededByWhitespace = spaceSearch.canFind(c); + preceededByWhitespace = c.isSpace != 0; followedByWhitespace = index + 2 >= scalar.length || - spaceSearch.canFind(scalar[index + 2]); + scalar[index + 2].isSpace; } with(analysis.flags) diff --git a/source/dyaml/fastcharsearch.d b/source/dyaml/fastcharsearch.d deleted file mode 100644 index 8d524fd..0000000 --- a/source/dyaml/fastcharsearch.d +++ /dev/null @@ -1,95 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.fastcharsearch; - - -import std.algorithm; -import std.conv; - - -package: - -/** - * Mixin used for fast searching for a character in string. - * - * Creates a lookup table to quickly determine if a character - * is present in the string. Size of the lookup table is limited; - * any characters not represented in the table will be checked - * by ordinary equality comparison. - * - * Params: chars = String to search in. - * tableSize = Maximum number of bytes used by the table. - * - * Generated method: - * bool canFind(dchar c) - * - * Determines if a character is in the string. - */ -template FastCharSearch(dstring chars, uint tableSize = 256) -{ - private mixin(searchCode!(chars, tableSize)()); -} - -/// Generate the search table and the canFind method. -string searchCode(dstring chars, uint tableSize)() -{ - import std.string; - - const tableSizeStr = tableSize.to!string; - ubyte[tableSize] table; - table[] = 0; - - //Characters that don't fit in the table. - dchar[] specialChars; - - foreach(c; chars) - { - if(c < tableSize) { table[c] = 1; } - else { specialChars ~= c; } - } - - string specialCharsCode() - { - return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || }); - } - - const caseInTable = - q{ - if(c < %s) - { - return cast(immutable(bool))table_[c]; - } - }.format(tableSize); - - string code; - if(tableSize) - { - code ~= - q{ - static immutable ubyte[%s] table_ = [ - %s]; - }.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, })); - } - code ~= - q{ - bool canFind(const dchar c) @safe pure nothrow @nogc - { - %s - - return %s; - } - }.format(tableSize ? caseInTable : "", - specialChars.length ? specialCharsCode() : q{false}); - - return code; -} - -@safe unittest -{ - mixin FastCharSearch!("+", 128) search; - assert(search.canFind('+')); -} diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d index 725cd8b..4a34980 100644 --- a/source/dyaml/reader.d +++ b/source/dyaml/reader.d @@ -23,11 +23,10 @@ import std.utf; import tinyendian; -import dyaml.fastcharsearch; import dyaml.encoding; import dyaml.exception; - +alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029'); package: @@ -301,8 +300,6 @@ final class Reader /// Params: length = Number of characters to move position forward. void forward(size_t length) @safe pure { - mixin FastCharSearch!"\n\u0085\u2028\u2029"d search; - while(length > 0) { auto asciiToTake = min(upcomingASCII_, length); @@ -339,7 +336,7 @@ final class Reader const c = decode(buffer_, bufferOffset_); // New line. (can compare with '\n' without decoding since it's ASCII) - if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) { ++line_; column_ = 0; @@ -377,7 +374,6 @@ final class Reader } // UTF-8 - mixin FastCharSearch!"\n\u0085\u2028\u2029"d search; assert(bufferOffset_ < buffer_.length, "Attempted to decode past the end of YAML buffer"); assert(buffer_[bufferOffset_] >= 0x80, @@ -386,7 +382,7 @@ final class Reader const c = decode(buffer_, bufferOffset_); // New line. (can compare with '\n' without decoding since it's ASCII) - if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n')) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) { ++line_; column_ = 0; diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d index 7511a08..1babf99 100644 --- a/source/dyaml/scanner.d +++ b/source/dyaml/scanner.d @@ -22,7 +22,6 @@ import std.typecons; import std.traits : Unqual; import std.utf; -import dyaml.fastcharsearch; import dyaml.escapes; import dyaml.exception; import dyaml.queue; @@ -53,6 +52,21 @@ package: /// TAG(value) /// SCALAR(value, plain, style) +alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%'); + +alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\'); /// Marked exception thrown at scanner errors. /// @@ -153,14 +167,6 @@ final class Scanner /// not. char[256] msgBuffer_; - /// Used to detect if a character is any whitespace plus '\0' - mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace; - /// Used to detect if a character is any line break plus '\0' - mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks; - - /// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch. - alias canFind = std.algorithm.canFind; - public: /// Construct a Scanner using specified Reader. this(Reader reader) @safe nothrow @@ -750,7 +756,7 @@ final class Scanner return reader_.column == 0 && reader_.peekByte() == '-' && reader_.prefix(3) == "---" && - searchAllWhitespace.canFind(reader_.peek(3)); + reader_.peek(3).isWhiteSpace; } /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') @@ -760,13 +766,13 @@ final class Scanner return reader_.column == 0 && reader_.peekByte() == '.' && reader_.prefix(3) == "..." && - searchAllWhitespace.canFind(reader_.peek(3)); + reader_.peek(3).isWhiteSpace; } /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') bool checkBlockEntry() @safe { - return searchAllWhitespace.canFind(reader_.peek(1)); + return !!reader_.peek(1).isWhiteSpace; } /// Check if the next token is KEY(flow context): '?' @@ -774,7 +780,7 @@ final class Scanner /// or KEY(block context): '?' (' '|'\n') bool checkKey() @safe { - return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1))); + return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace); } /// Check if the next token is VALUE(flow context): ':' @@ -782,7 +788,7 @@ final class Scanner /// or VALUE(block context): ':' (' '|'\n') bool checkValue() @safe { - return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)); + return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace; } /// Check if the next token is a plain scalar. @@ -802,13 +808,11 @@ final class Scanner bool checkPlain() @safe { const c = reader_.peek(); - mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d - searchPlainNotFirstChar; - if(!searchPlainNotFirstChar.canFind(c)) + if(!c.isNonScalarStartCharacter) { return true; } - return !searchAllWhitespace.canFind(reader_.peek(1)) && + return !reader_.peek(1).isWhiteSpace && (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); } @@ -844,7 +848,7 @@ final class Scanner /// Scan and throw away all characters until next line break. void scanToNextBreak() @safe { - while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); } + while(!reader_.peek().isBreak) { reader_.forward(); } } /// Scan all characters until next line break. @@ -854,7 +858,7 @@ final class Scanner void scanToNextBreakToSlice() @safe { uint length = 0; - while(!searchAllBreaks.canFind(reader_.peek(length))) + while(!reader_.peek(length).isBreak) { ++length; } @@ -1068,7 +1072,7 @@ final class Scanner { findNextNonSpace(); if(reader_.peekByte() == '#') { scanToNextBreak(); } - if(searchAllBreaks.canFind(reader_.peek())) + if(reader_.peek().isBreak) { scanLineBreak(); return; @@ -1102,7 +1106,7 @@ final class Scanner char[] value = reader_.sliceBuilder.finish(); if(error_) { return Token.init; } - if(!searchAllWhitespace.canFind(reader_.peek()) && + if(!reader_.peek().isWhiteSpace && !"?:,]}%@"d.canFind(reader_.peekByte())) { enum anchorCtx = "While scanning an anchor"; @@ -1137,7 +1141,6 @@ final class Scanner // (slice) we will produce. uint handleEnd; - mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search; if(c == '<') { reader_.forward(2); @@ -1153,7 +1156,7 @@ final class Scanner } reader_.forward(); } - else if(searchAllWhitespace.canFind(c)) + else if(c.isWhiteSpace) { reader_.forward(); handleEnd = 0; @@ -1164,7 +1167,7 @@ final class Scanner uint length = 1; bool useHandle = false; - while(!search.canFind(c)) + while(!c.isBreakOrSpace) { if(c == '!') { @@ -1192,7 +1195,7 @@ final class Scanner if(error_) { return Token.init; } } - if(search.canFind(reader_.peek())) + if(reader_.peek().isBreakOrSpace) { char[] slice = reader_.sliceBuilder.finish(); return tagToken(startMark, reader_.mark, slice, handleEnd); @@ -1431,7 +1434,7 @@ final class Scanner findNextNonSpace(); if(reader_.peekByte()== '#') { scanToNextBreak(); } - if(searchAllBreaks.canFind(reader_.peek())) + if(reader_.peek().isBreak) { scanLineBreak(); return; @@ -1523,8 +1526,6 @@ final class Scanner { dchar c = reader_.peek(); - mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search; - size_t numCodePoints = 0; // This is an optimized way of writing: // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; } @@ -1543,7 +1544,7 @@ final class Scanner { // slice is UTF-8 - need to decode const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i); - if(search.canFind(ch)) { break outer; } + if(ch.isFlowScalarBreakSpace) { break outer; } ++numCodePoints; } oldSliceLength = slice.length; @@ -1686,7 +1687,7 @@ final class Scanner // Instead of checking indentation, we check for document separators. const prefix = reader_.prefix(3); if((prefix == "---" || prefix == "...") && - searchAllWhitespace.canFind(reader_.peek(3))) + reader_.peek(3).isWhiteSpace) { error("While scanning a quoted scalar", startMark, "found unexpected document separator", reader_.mark); @@ -1738,8 +1739,8 @@ final class Scanner for(;;) { const cNext = reader_.peek(length + 1); - if(searchAllWhitespace.canFind(c) || - (c == ':' && searchAllWhitespace.canFind(cNext))) + if(c.isWhiteSpace || + (c == ':' && cNext.isWhiteSpace)) { break; } @@ -1752,7 +1753,7 @@ final class Scanner for(;;) { c = reader_.peek(length); - if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c)) + if(c.isWhiteSpace || ",:?[]{}"d.canFind(c)) { break; } @@ -1762,7 +1763,7 @@ final class Scanner // It's not clear what we should do with ':' in the flow context. if(flowLevel_ > 0 && c == ':' && - !searchAllWhitespace.canFind(reader_.peek(length + 1)) && + !reader_.peek(length + 1).isWhiteSpace && !",[]{}"d.canFind(reader_.peek(length + 1))) { // This is an error; throw the slice away. @@ -1818,10 +1819,7 @@ final class Scanner reader_.forward(length); dchar c = reader_.peek(); - mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search; - // No newline after the spaces (if any) - // (Excluding ' ' so we can use the same FastCharSearch as below) - if(!search.canFind(c) && c != ' ') + if(!c.isNSChar) { // We have spaces, but no newline. if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } @@ -1846,7 +1844,7 @@ final class Scanner alias Transaction = SliceBuilder.Transaction; auto transaction = Transaction(&reader_.sliceBuilder); if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } - while(search.canFind(reader_.peek())) + while(reader_.peek().isNSChar) { if(reader_.peekByte() == ' ') { reader_.forward(); } else @@ -1914,8 +1912,7 @@ final class Scanner const startLen = reader_.sliceBuilder.length; { uint length = 0; - mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search; - while(c.isAlphaNum || search.canFind(c)) + while(c.isAlphaNum || c.isURIChar) { if(c == '%') {