Only validate UTF-8 if we get UTF-8 input (UTF-16/32 validated at conversion)
This commit is contained in:
parent
6c15bd95cc
commit
4e2c3e6093
|
@ -121,14 +121,10 @@ final class Reader
|
|||
|
||||
buffer8_ = utf8Result.utf8;
|
||||
|
||||
const validateResult = buffer8_.validateUTF8NoGC;
|
||||
enforce(validateResult.valid,
|
||||
new ReaderException(validateResult.msg ~
|
||||
validateResult.sequence.to!string));
|
||||
characterCount_ = utf8Result.characterCount;
|
||||
// Check that all characters in buffer are printable.
|
||||
enforce(isPrintableValidUTF8(buffer8_),
|
||||
new ReaderException("Special unicode characters are not allowed"));
|
||||
characterCount_ = validateResult.characterCount;
|
||||
|
||||
this.sliceBuilder = SliceBuilder(this);
|
||||
}
|
||||
|
@ -616,10 +612,11 @@ private:
|
|||
//
|
||||
// A struct with the following members:
|
||||
//
|
||||
// $(D string errorMessage) In case of an error, the error message is stored here. If
|
||||
// there was no error, errorMessage is NULL. Always check this
|
||||
// first.
|
||||
// $(D char[] utf8) input converted to UTF-8. May be a slice of input.
|
||||
// $(D string errorMessage) In case of an error, the error message is stored here. If
|
||||
// there was no error, errorMessage is NULL. Always check
|
||||
// this first.
|
||||
// $(D char[] utf8) input converted to UTF-8. May be a slice of input.
|
||||
// $(D size_t characterCount) Number of characters (code points) in input.
|
||||
auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||
{
|
||||
// Documented in function ddoc.
|
||||
|
@ -627,6 +624,7 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
|||
{
|
||||
string errorMessage;
|
||||
char[] utf8;
|
||||
size_t characterCount;
|
||||
}
|
||||
|
||||
Result result;
|
||||
|
@ -639,55 +637,57 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
|||
// result = A Result struct to put encoded result and any error messages to.
|
||||
//
|
||||
// On error, result.errorMessage will be set.
|
||||
static void encode(C)(C[] input, ref Result result) @safe pure nothrow
|
||||
static void encode(C)(C[] input, ref Result result) @safe pure
|
||||
{
|
||||
try
|
||||
// We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or
|
||||
// less bytes.
|
||||
static if(is(C == dchar))
|
||||
{
|
||||
// We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or
|
||||
// less bytes.
|
||||
static if(is(C == dchar))
|
||||
char[4] encodeBuf;
|
||||
auto utf8 = cast(char[])input;
|
||||
auto length = 0;
|
||||
foreach(dchar c; input)
|
||||
{
|
||||
char[4] encodeBuf;
|
||||
auto utf8 = cast(char[])input;
|
||||
auto length = 0;
|
||||
foreach(dchar c; input)
|
||||
++result.characterCount;
|
||||
// ASCII
|
||||
if(c < 0x80)
|
||||
{
|
||||
// ASCII
|
||||
if(c < 0x80)
|
||||
{
|
||||
utf8[length++] = cast(char)c;
|
||||
continue;
|
||||
}
|
||||
|
||||
const encodeResult = encodeCharNoGC!(No.validated)(encodeBuf, c);
|
||||
if(encodeResult.errorMessage !is null)
|
||||
{
|
||||
result.errorMessage = encodeResult.errorMessage;
|
||||
return;
|
||||
}
|
||||
const bytes = encodeResult.bytes;
|
||||
utf8[length .. length + bytes] = encodeBuf[0 .. bytes];
|
||||
length += bytes;
|
||||
utf8[length++] = cast(char)c;
|
||||
continue;
|
||||
}
|
||||
result.utf8 = utf8[0 .. length];
|
||||
}
|
||||
// Unfortunately we can't do UTF-16 in place so we just use std.conv.to
|
||||
else
|
||||
{
|
||||
result.utf8 = input.to!(char[]);
|
||||
|
||||
const encodeResult = encodeCharNoGC!(No.validated)(encodeBuf, c);
|
||||
if(encodeResult.errorMessage !is null)
|
||||
{
|
||||
result.errorMessage = encodeResult.errorMessage;
|
||||
return;
|
||||
}
|
||||
const bytes = encodeResult.bytes;
|
||||
utf8[length .. length + bytes] = encodeBuf[0 .. bytes];
|
||||
length += bytes;
|
||||
}
|
||||
result.utf8 = utf8[0 .. length];
|
||||
}
|
||||
catch(ConvException e) { result.errorMessage = e.msg; }
|
||||
catch(UTFException e) { result.errorMessage = e.msg; }
|
||||
catch(Exception e)
|
||||
// Unfortunately we can't do UTF-16 in place so we just use std.conv.to
|
||||
else
|
||||
{
|
||||
assert(false, "Unexpected exception in encode(): " ~ e.msg);
|
||||
result.characterCount = std.utf.count(input);
|
||||
result.utf8 = input.to!(char[]);
|
||||
}
|
||||
}
|
||||
|
||||
final switch(encoding)
|
||||
try final switch(encoding)
|
||||
{
|
||||
case UTFEncoding.UTF_8: result.utf8 = cast(char[])input; break;
|
||||
case UTFEncoding.UTF_8:
|
||||
result.utf8 = cast(char[])input;
|
||||
const validateResult = result.utf8.validateUTF8NoGC();
|
||||
if(!validateResult.valid)
|
||||
{
|
||||
result.errorMessage = "UTF-8 validation error: " ~ validateResult.msg ~
|
||||
validateResult.sequence.to!string;
|
||||
}
|
||||
result.characterCount = validateResult.characterCount;
|
||||
break;
|
||||
case UTFEncoding.UTF_16:
|
||||
assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
|
||||
encode(cast(wchar[])input, result);
|
||||
|
@ -697,8 +697,12 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
|||
encode(cast(dchar[])input, result);
|
||||
break;
|
||||
}
|
||||
|
||||
if(result.errorMessage !is null) { return result; }
|
||||
catch(ConvException e) { result.errorMessage = e.msg; }
|
||||
catch(UTFException e) { result.errorMessage = e.msg; }
|
||||
catch(Exception e)
|
||||
{
|
||||
assert(false, "Unexpected exception in encode(): " ~ e.msg);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue