Func to count the num of ASCII chars in string before the first UTF-8 sequence
This commit is contained in:
parent
8902ea8806
commit
1c0702f3cd
|
@ -758,6 +758,46 @@ bool isPrintableValidUTF8(const char[] chars) @safe pure nothrow @nogc
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Counts the number of ASCII characters in buffer until the first UTF-8 sequence.
|
||||||
|
///
|
||||||
|
/// Used to determine how many characters we can process without decoding.
|
||||||
|
size_t countASCII(const(char)[] buffer) @trusted pure nothrow @nogc
|
||||||
|
{
|
||||||
|
size_t count = 0;
|
||||||
|
|
||||||
|
// The topmost bit in ASCII characters is always 0
|
||||||
|
enum ulong Mask8 = 0x7f7f7f7f7f7f7f7f;
|
||||||
|
enum uint Mask4 = 0x7f7f7f7f;
|
||||||
|
enum ushort Mask2 = 0x7f7f;
|
||||||
|
|
||||||
|
// Start by checking in 8-byte chunks.
|
||||||
|
while(buffer.length >= Mask8.sizeof)
|
||||||
|
{
|
||||||
|
const block = *cast(typeof(Mask8)*)buffer.ptr;
|
||||||
|
const masked = Mask8 & block;
|
||||||
|
if(masked != block) { break; }
|
||||||
|
count += Mask8.sizeof;
|
||||||
|
buffer = buffer[Mask8.sizeof .. $];
|
||||||
|
}
|
||||||
|
|
||||||
|
// If 8 bytes didn't match, try 4, 2 bytes.
|
||||||
|
import std.typetuple;
|
||||||
|
foreach(Mask; TypeTuple!(Mask4, Mask2))
|
||||||
|
{
|
||||||
|
if(buffer.length < Mask.sizeof) { continue; }
|
||||||
|
const block = *cast(typeof(Mask)*)buffer.ptr;
|
||||||
|
const masked = Mask & block;
|
||||||
|
if(masked != block) { continue; }
|
||||||
|
count += Mask.sizeof;
|
||||||
|
buffer = buffer[Mask.sizeof .. $];
|
||||||
|
}
|
||||||
|
|
||||||
|
// If even a 2-byte chunk didn't match, test just one byte.
|
||||||
|
if(buffer.empty || buffer[0] >= 0x80) { return count; }
|
||||||
|
++count;
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
// Unittests.
|
// Unittests.
|
||||||
|
|
||||||
void testEndian(R)()
|
void testEndian(R)()
|
||||||
|
|
Loading…
Reference in a new issue