Func to count the num of ASCII chars in string before the first UTF-8 sequence
This commit is contained in:
parent
8902ea8806
commit
1c0702f3cd
|
@ -758,6 +758,46 @@ bool isPrintableValidUTF8(const char[] chars) @safe pure nothrow @nogc
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Counts the number of ASCII characters in buffer until the first UTF-8 sequence.
|
||||
///
|
||||
/// Used to determine how many characters we can process without decoding.
|
||||
size_t countASCII(const(char)[] buffer) @trusted pure nothrow @nogc
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
// The topmost bit in ASCII characters is always 0
|
||||
enum ulong Mask8 = 0x7f7f7f7f7f7f7f7f;
|
||||
enum uint Mask4 = 0x7f7f7f7f;
|
||||
enum ushort Mask2 = 0x7f7f;
|
||||
|
||||
// Start by checking in 8-byte chunks.
|
||||
while(buffer.length >= Mask8.sizeof)
|
||||
{
|
||||
const block = *cast(typeof(Mask8)*)buffer.ptr;
|
||||
const masked = Mask8 & block;
|
||||
if(masked != block) { break; }
|
||||
count += Mask8.sizeof;
|
||||
buffer = buffer[Mask8.sizeof .. $];
|
||||
}
|
||||
|
||||
// If 8 bytes didn't match, try 4, 2 bytes.
|
||||
import std.typetuple;
|
||||
foreach(Mask; TypeTuple!(Mask4, Mask2))
|
||||
{
|
||||
if(buffer.length < Mask.sizeof) { continue; }
|
||||
const block = *cast(typeof(Mask)*)buffer.ptr;
|
||||
const masked = Mask & block;
|
||||
if(masked != block) { continue; }
|
||||
count += Mask.sizeof;
|
||||
buffer = buffer[Mask.sizeof .. $];
|
||||
}
|
||||
|
||||
// If even a 2-byte chunk didn't match, test just one byte.
|
||||
if(buffer.empty || buffer[0] >= 0x80) { return count; }
|
||||
++count;
|
||||
|
||||
return count;
|
||||
}
|
||||
// Unittests.
|
||||
|
||||
void testEndian(R)()
|
||||
|
|
Loading…
Reference in a new issue