remove fastcharsearch

2018-04-21 01:52:58 -03:00 · 2018-04-21 01:52:58 -03:00 · cd88b7ad17
commit cd88b7ad17
parent 993dd6c5bc
5 changed files with 51 additions and 156 deletions
--- a/meson.build
+++ b/meson.build
@ -18,7 +18,6 @@ dyaml_src = [
    'source/dyaml/escapes.d',
    'source/dyaml/event.d',
    'source/dyaml/exception.d',
    'source/dyaml/fastcharsearch.d',
    'source/dyaml/flags.d',
    'source/dyaml/hacks.d',
    'source/dyaml/linebreak.d',
--- a/source/dyaml/emitter.d
+++ b/source/dyaml/emitter.d
@ -29,7 +29,6 @@ import dyaml.encoding;
 import dyaml.escapes;
 import dyaml.event;
 import dyaml.exception;
 import dyaml.fastcharsearch;
 import dyaml.flags;
 import dyaml.linebreak;
 import dyaml.queue;
@ -63,8 +62,11 @@ struct ScalarAnalysis
 private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029');
-// override the canFind added by the FastCharSearch mixins
+private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`');
-private alias canFind = std.algorithm.canFind;
+
 private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}');
 private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t');
 //Emits YAML events into a file/stream.
 struct Emitter
@ -1032,14 +1034,11 @@ struct Emitter
            foreach(const size_t index, const dchar c; scalar)
            {
                mixin FastCharSearch!("#,[]{}&*!|>\'\"%@`"d, 128) specialCharSearch;
                mixin FastCharSearch!(",?[]{}"d, 128) flowIndicatorSearch;
                //Check for indicators.
                if(index == 0)
                {
                    //Leading indicators are special characters.
-                    if(specialCharSearch.canFind(c))
+                    if(c.isSpecialChar)
                    {
                        flowIndicators = blockIndicators = true;
                    }
@ -1056,7 +1055,7 @@ struct Emitter
                else
                {
                    //Some indicators cannot appear within a scalar as well.
-                    if(flowIndicatorSearch.canFind(c)){flowIndicators = true;}
+                    if(c.isFlowIndicator){flowIndicators = true;}
                    if(c == ':')
                    {
                        flowIndicators = true;
@ -1099,11 +1098,10 @@ struct Emitter
                    previousSpace = previousBreak = false;
                }
                mixin FastCharSearch! "\0\n\r\u0085\u2028\u2029 \t"d spaceSearch;
                //Prepare for the next character.
-                preceededByWhitespace = spaceSearch.canFind(c);
+                preceededByWhitespace = c.isSpace != 0;
                followedByWhitespace = index + 2 >= scalar.length ||
-                                       spaceSearch.canFind(scalar[index + 2]);
+                                       scalar[index + 2].isSpace;
            }
            with(analysis.flags)
--- a/source/dyaml/fastcharsearch.d
+++ b/source/dyaml/fastcharsearch.d
@ -1,95 +0,0 @@
 //          Copyright Ferdinand Majerech 2011.
 // Distributed under the Boost Software License, Version 1.0.
 //    (See accompanying file LICENSE_1_0.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 module dyaml.fastcharsearch;
 import std.algorithm;
 import std.conv;
 package:
 /**
 * Mixin used for fast searching for a character in string.
 *
 * Creates a lookup table to quickly determine if a character
 * is present in the string. Size of the lookup table is limited;
 * any characters not represented in the table will be checked
 * by ordinary equality comparison.
 *
 * Params:  chars     = String to search in.
 *          tableSize = Maximum number of bytes used by the table.
 *
 * Generated method:
 *     bool canFind(dchar c)
 *
 *     Determines if a character is in the string.
 */
 template FastCharSearch(dstring chars, uint tableSize = 256)
 {
    private mixin(searchCode!(chars, tableSize)());
 }
 /// Generate the search table and the canFind method.
 string searchCode(dstring chars, uint tableSize)()
 {
    import std.string;
    const tableSizeStr = tableSize.to!string;
    ubyte[tableSize] table;
    table[] = 0;
    //Characters that don't fit in the table.
    dchar[] specialChars;
    foreach(c; chars)
    {
        if(c < tableSize) { table[c] = 1; }
        else              { specialChars ~= c; }
    }
    string specialCharsCode()
    {
        return specialChars.map!(c => q{cast(uint)c == %s}.format(cast(uint)c)).join(q{ || });
    }
    const caseInTable =
    q{
            if(c < %s)
            {
                return cast(immutable(bool))table_[c];
            }
    }.format(tableSize);
    string code;
    if(tableSize)
    {
        code ~=
        q{
            static immutable ubyte[%s] table_ = [
            %s];
        }.format(tableSize, table[].map!(c => c ? q{true} : q{false}).join(q{, }));
    }
    code ~=
    q{
        bool canFind(const dchar c) @safe pure nothrow @nogc
        {
            %s
            return %s;
        }
    }.format(tableSize ? caseInTable : "",
             specialChars.length ? specialCharsCode() : q{false});
    return code;
 }
@safe unittest
 {
    mixin FastCharSearch!("+", 128) search;
    assert(search.canFind('+'));
 }
--- a/source/dyaml/reader.d
+++ b/source/dyaml/reader.d
@ -23,11 +23,10 @@ import std.utf;
 import tinyendian;
 import dyaml.fastcharsearch;
 import dyaml.encoding;
 import dyaml.exception;
-
+alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029');
 package:
@ -301,8 +300,6 @@ final class Reader
        /// Params:  length = Number of characters to move position forward.
        void forward(size_t length) @safe pure
        {
            mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
            while(length > 0)
            {
                auto asciiToTake = min(upcomingASCII_, length);
@ -339,7 +336,7 @@ final class Reader
                const c = decode(buffer_, bufferOffset_);
                // New line. (can compare with '\n' without decoding since it's ASCII)
-                if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
+                if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
                {
                    ++line_;
                    column_ = 0;
@ -377,7 +374,6 @@ final class Reader
            }
            // UTF-8
            mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
            assert(bufferOffset_ < buffer_.length,
                   "Attempted to decode past the end of YAML buffer");
            assert(buffer_[bufferOffset_] >= 0x80,
@ -386,7 +382,7 @@ final class Reader
            const c = decode(buffer_, bufferOffset_);
            // New line. (can compare with '\n' without decoding since it's ASCII)
-            if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
+            if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
            {
                ++line_;
                column_ = 0;
--- a/source/dyaml/scanner.d
+++ b/source/dyaml/scanner.d
@ -22,7 +22,6 @@ import std.typecons;
 import std.traits : Unqual;
 import std.utf;
 import dyaml.fastcharsearch;
 import dyaml.escapes;
 import dyaml.exception;
 import dyaml.queue;
@ -53,6 +52,21 @@ package:
 /// TAG(value)
 /// SCALAR(value, plain, style)
 alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
 alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
 alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
 alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
 alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
 alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
 alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
 alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
 /// Marked exception thrown at scanner errors.
 ///
@ -153,14 +167,6 @@ final class Scanner
        /// not.
        char[256] msgBuffer_;
        /// Used to detect if a character is any whitespace plus '\0'
        mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
        /// Used to detect if a character is any line break plus '\0'
        mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks;
        /// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch.
        alias canFind = std.algorithm.canFind;
    public:
        /// Construct a Scanner using specified Reader.
        this(Reader reader) @safe nothrow
@ -750,7 +756,7 @@ final class Scanner
            return reader_.column     == 0     &&
                   reader_.peekByte() == '-'   &&
                   reader_.prefix(3)  == "---" &&
-                   searchAllWhitespace.canFind(reader_.peek(3));
+                   reader_.peek(3).isWhiteSpace;
        }
        /// Check if the next token is DOCUMENT-END:     ^ '...' (' '|'\n')
@ -760,13 +766,13 @@ final class Scanner
            return reader_.column     == 0     &&
                   reader_.peekByte() == '.'   &&
                   reader_.prefix(3)  == "..." &&
-                   searchAllWhitespace.canFind(reader_.peek(3));
+                   reader_.peek(3).isWhiteSpace;
        }
        /// Check if the next token is BLOCK-ENTRY:      '-' (' '|'\n')
        bool checkBlockEntry() @safe
        {
-            return searchAllWhitespace.canFind(reader_.peek(1));
+            return !!reader_.peek(1).isWhiteSpace;
        }
        /// Check if the next token is KEY(flow context):    '?'
@ -774,7 +780,7 @@ final class Scanner
        /// or KEY(block context):   '?' (' '|'\n')
        bool checkKey() @safe
        {
-            return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)));
+            return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
        }
        /// Check if the next token is VALUE(flow context):  ':'
@ -782,7 +788,7 @@ final class Scanner
        /// or VALUE(block context): ':' (' '|'\n')
        bool checkValue() @safe
        {
-            return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1));
+            return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
        }
        /// Check if the next token is a plain scalar.
@ -802,13 +808,11 @@ final class Scanner
        bool checkPlain() @safe
        {
            const c = reader_.peek();
-            mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d
+            if(!c.isNonScalarStartCharacter)
                searchPlainNotFirstChar;
            if(!searchPlainNotFirstChar.canFind(c))
            {
                return true;
            }
-            return !searchAllWhitespace.canFind(reader_.peek(1)) &&
+            return !reader_.peek(1).isWhiteSpace &&
                   (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
        }
@ -844,7 +848,7 @@ final class Scanner
        /// Scan and throw away all characters until next line break.
        void scanToNextBreak() @safe
        {
-            while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); }
+            while(!reader_.peek().isBreak) { reader_.forward(); }
        }
        /// Scan all characters until next line break.
@ -854,7 +858,7 @@ final class Scanner
        void scanToNextBreakToSlice() @safe
        {
            uint length = 0;
-            while(!searchAllBreaks.canFind(reader_.peek(length)))
+            while(!reader_.peek(length).isBreak)
            {
                ++length;
            }
@ -1068,7 +1072,7 @@ final class Scanner
        {
            findNextNonSpace();
            if(reader_.peekByte() == '#') { scanToNextBreak(); }
-            if(searchAllBreaks.canFind(reader_.peek()))
+            if(reader_.peek().isBreak)
            {
                scanLineBreak();
                return;
@ -1102,7 +1106,7 @@ final class Scanner
            char[] value = reader_.sliceBuilder.finish();
            if(error_)   { return Token.init; }
-            if(!searchAllWhitespace.canFind(reader_.peek()) &&
+            if(!reader_.peek().isWhiteSpace &&
               !"?:,]}%@"d.canFind(reader_.peekByte()))
            {
                enum anchorCtx = "While scanning an anchor";
@ -1137,7 +1141,6 @@ final class Scanner
            // (slice) we will produce.
            uint handleEnd;
            mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
            if(c == '<')
            {
                reader_.forward(2);
@ -1153,7 +1156,7 @@ final class Scanner
                }
                reader_.forward();
            }
-            else if(searchAllWhitespace.canFind(c))
+            else if(c.isWhiteSpace)
            {
                reader_.forward();
                handleEnd = 0;
@ -1164,7 +1167,7 @@ final class Scanner
                uint length = 1;
                bool useHandle = false;
-                while(!search.canFind(c))
+                while(!c.isBreakOrSpace)
                {
                    if(c == '!')
                    {
@ -1192,7 +1195,7 @@ final class Scanner
                if(error_) { return Token.init; }
            }
-            if(search.canFind(reader_.peek()))
+            if(reader_.peek().isBreakOrSpace)
            {
                char[] slice = reader_.sliceBuilder.finish();
                return tagToken(startMark, reader_.mark, slice, handleEnd);
@ -1431,7 +1434,7 @@ final class Scanner
            findNextNonSpace();
            if(reader_.peekByte()== '#') { scanToNextBreak(); }
-            if(searchAllBreaks.canFind(reader_.peek()))
+            if(reader_.peek().isBreak)
            {
                scanLineBreak();
                return;
@ -1523,8 +1526,6 @@ final class Scanner
            {
                dchar c = reader_.peek();
                mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
                size_t numCodePoints = 0;
                // This is an optimized way of writing:
                // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
@ -1543,7 +1544,7 @@ final class Scanner
                    {
                        // slice is UTF-8 - need to decode
                        const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
-                        if(search.canFind(ch)) { break outer; }
+                        if(ch.isFlowScalarBreakSpace) { break outer; }
                        ++numCodePoints;
                    }
                    oldSliceLength = slice.length;
@ -1686,7 +1687,7 @@ final class Scanner
                // Instead of checking indentation, we check for document separators.
                const prefix = reader_.prefix(3);
                if((prefix == "---" || prefix == "...") &&
-                   searchAllWhitespace.canFind(reader_.peek(3)))
+                   reader_.peek(3).isWhiteSpace)
                {
                    error("While scanning a quoted scalar", startMark,
                          "found unexpected document separator", reader_.mark);
@ -1738,8 +1739,8 @@ final class Scanner
                    for(;;)
                    {
                        const cNext = reader_.peek(length + 1);
-                        if(searchAllWhitespace.canFind(c) ||
+                        if(c.isWhiteSpace ||
-                           (c == ':' && searchAllWhitespace.canFind(cNext)))
+                           (c == ':' && cNext.isWhiteSpace))
                        {
                            break;
                        }
@ -1752,7 +1753,7 @@ final class Scanner
                    for(;;)
                    {
                        c = reader_.peek(length);
-                        if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c))
+                        if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
                        {
                            break;
                        }
@ -1762,7 +1763,7 @@ final class Scanner
                // It's not clear what we should do with ':' in the flow context.
                if(flowLevel_ > 0 && c == ':' &&
-                   !searchAllWhitespace.canFind(reader_.peek(length + 1)) &&
+                   !reader_.peek(length + 1).isWhiteSpace &&
                   !",[]{}"d.canFind(reader_.peek(length + 1)))
                {
                    // This is an error; throw the slice away.
@ -1818,10 +1819,7 @@ final class Scanner
            reader_.forward(length);
            dchar c = reader_.peek();
-            mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search;
+            if(!c.isNSChar)
            // No newline after the spaces (if any)
            // (Excluding ' ' so we can use the same FastCharSearch as below)
            if(!search.canFind(c) && c != ' ')
            {
                // We have spaces, but no newline.
                if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
@ -1846,7 +1844,7 @@ final class Scanner
            alias Transaction = SliceBuilder.Transaction;
            auto transaction = Transaction(&reader_.sliceBuilder);
            if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
-            while(search.canFind(reader_.peek()))
+            while(reader_.peek().isNSChar)
            {
                if(reader_.peekByte() == ' ') { reader_.forward(); }
                else
@ -1914,8 +1912,7 @@ final class Scanner
            const startLen = reader_.sliceBuilder.length;
            {
                uint length = 0;
-                mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search;
+                while(c.isAlphaNum || c.isURIChar)
                while(c.isAlphaNum || search.canFind(c))
                {
                    if(c == '%')
                    {