diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d index 221438a..8e090c4 100644 --- a/source/dyaml/reader.d +++ b/source/dyaml/reader.d @@ -803,109 +803,10 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow /// Determine if all characters (code points, not bytes) in a string are printable. bool isPrintableValidUTF8(const char[] chars) @safe pure { - // This is oversized (only 128 entries are necessary) simply because having 256 - // entries improves performance... for some reason (alignment?) - bool[256] printable = [false, false, false, false, false, false, false, false, - false, true, true, false, false, true, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false]; - - for(size_t index; index < chars.length;) + import std.uni : isControl, isWhite; + foreach (dchar chr; chars) { - // Fast path for ASCII. - // Both this while() block and the if() block below it are optimized, unrolled - // versions of the for() block below them; the while()/if() block could be - // removed without affecting logic, but both help increase performance. - size_t asciiCount = countASCII(chars[index .. $]); - // 8 ASCII iterations unrolled, looping while there are at most 8 ASCII chars. - while(asciiCount > 8) - { - const dchar b0 = chars[index]; - const dchar b1 = chars[index + 1]; - const dchar b2 = chars[index + 2]; - const dchar b3 = chars[index + 3]; - const dchar b4 = chars[index + 4]; - const dchar b5 = chars[index + 5]; - const dchar b6 = chars[index + 6]; - const dchar b7 = chars[index + 7]; - - index += 8; - asciiCount -= 8; - - const all = printable[b0] & printable[b1] & printable[b2] & printable[b3] & - printable[b4] & printable[b5] & printable[b6] & printable[b1]; - if(!all) - { - return false; - } - } - // 4 ASCII iterations unrolled - if(asciiCount > 4) - { - const char b0 = chars[index]; - const char b1 = chars[index + 1]; - const char b2 = chars[index + 2]; - const char b3 = chars[index + 3]; - - index += 4; - asciiCount -= 4; - - if(!printable[b0]) { return false; } - if(!printable[b1]) { return false; } - if(!printable[b2]) { return false; } - if(!printable[b3]) { return false; } - } - // Any remaining ASCII chars. This is really the only code needed to handle - // ASCII, the above if() and while() blocks are just an optimization. - for(; asciiCount > 0; --asciiCount) - { - const char b = chars[index]; - ++index; - if(b >= 0x20) { continue; } - if(printable[b]) { continue; } - return false; - } - - if(index == chars.length) { break; } - - // Not ASCII, need to decode. - const dchar c = decode(chars, index); - // We now c is not ASCII, so only check for printable non-ASCII chars. - if(!(c == 0x85 || (c >= 0xA0 && c <= '\uD7FF') || - (c >= '\uE000' && c <= '\uFFFD') || - (c >= '\U00010000' && c <= '\U0010FFFF'))) + if (!chr.isValidDchar || (chr.isControl && !chr.isWhite)) { return false; }