Only validate UTF-8 if we get UTF-8 input (UTF-16/32 validated at conversion)
This commit is contained in:
parent
6c15bd95cc
commit
4e2c3e6093
|
@ -121,14 +121,10 @@ final class Reader
|
||||||
|
|
||||||
buffer8_ = utf8Result.utf8;
|
buffer8_ = utf8Result.utf8;
|
||||||
|
|
||||||
const validateResult = buffer8_.validateUTF8NoGC;
|
characterCount_ = utf8Result.characterCount;
|
||||||
enforce(validateResult.valid,
|
|
||||||
new ReaderException(validateResult.msg ~
|
|
||||||
validateResult.sequence.to!string));
|
|
||||||
// Check that all characters in buffer are printable.
|
// Check that all characters in buffer are printable.
|
||||||
enforce(isPrintableValidUTF8(buffer8_),
|
enforce(isPrintableValidUTF8(buffer8_),
|
||||||
new ReaderException("Special unicode characters are not allowed"));
|
new ReaderException("Special unicode characters are not allowed"));
|
||||||
characterCount_ = validateResult.characterCount;
|
|
||||||
|
|
||||||
this.sliceBuilder = SliceBuilder(this);
|
this.sliceBuilder = SliceBuilder(this);
|
||||||
}
|
}
|
||||||
|
@ -617,9 +613,10 @@ private:
|
||||||
// A struct with the following members:
|
// A struct with the following members:
|
||||||
//
|
//
|
||||||
// $(D string errorMessage) In case of an error, the error message is stored here. If
|
// $(D string errorMessage) In case of an error, the error message is stored here. If
|
||||||
// there was no error, errorMessage is NULL. Always check this
|
// there was no error, errorMessage is NULL. Always check
|
||||||
// first.
|
// this first.
|
||||||
// $(D char[] utf8) input converted to UTF-8. May be a slice of input.
|
// $(D char[] utf8) input converted to UTF-8. May be a slice of input.
|
||||||
|
// $(D size_t characterCount) Number of characters (code points) in input.
|
||||||
auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
{
|
{
|
||||||
// Documented in function ddoc.
|
// Documented in function ddoc.
|
||||||
|
@ -627,6 +624,7 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
{
|
{
|
||||||
string errorMessage;
|
string errorMessage;
|
||||||
char[] utf8;
|
char[] utf8;
|
||||||
|
size_t characterCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
Result result;
|
Result result;
|
||||||
|
@ -639,9 +637,7 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
// result = A Result struct to put encoded result and any error messages to.
|
// result = A Result struct to put encoded result and any error messages to.
|
||||||
//
|
//
|
||||||
// On error, result.errorMessage will be set.
|
// On error, result.errorMessage will be set.
|
||||||
static void encode(C)(C[] input, ref Result result) @safe pure nothrow
|
static void encode(C)(C[] input, ref Result result) @safe pure
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
{
|
||||||
// We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or
|
// We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or
|
||||||
// less bytes.
|
// less bytes.
|
||||||
|
@ -652,6 +648,7 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
auto length = 0;
|
auto length = 0;
|
||||||
foreach(dchar c; input)
|
foreach(dchar c; input)
|
||||||
{
|
{
|
||||||
|
++result.characterCount;
|
||||||
// ASCII
|
// ASCII
|
||||||
if(c < 0x80)
|
if(c < 0x80)
|
||||||
{
|
{
|
||||||
|
@ -674,20 +671,23 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
// Unfortunately we can't do UTF-16 in place so we just use std.conv.to
|
// Unfortunately we can't do UTF-16 in place so we just use std.conv.to
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
result.characterCount = std.utf.count(input);
|
||||||
result.utf8 = input.to!(char[]);
|
result.utf8 = input.to!(char[]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch(ConvException e) { result.errorMessage = e.msg; }
|
|
||||||
catch(UTFException e) { result.errorMessage = e.msg; }
|
|
||||||
catch(Exception e)
|
|
||||||
{
|
|
||||||
assert(false, "Unexpected exception in encode(): " ~ e.msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final switch(encoding)
|
try final switch(encoding)
|
||||||
{
|
{
|
||||||
case UTFEncoding.UTF_8: result.utf8 = cast(char[])input; break;
|
case UTFEncoding.UTF_8:
|
||||||
|
result.utf8 = cast(char[])input;
|
||||||
|
const validateResult = result.utf8.validateUTF8NoGC();
|
||||||
|
if(!validateResult.valid)
|
||||||
|
{
|
||||||
|
result.errorMessage = "UTF-8 validation error: " ~ validateResult.msg ~
|
||||||
|
validateResult.sequence.to!string;
|
||||||
|
}
|
||||||
|
result.characterCount = validateResult.characterCount;
|
||||||
|
break;
|
||||||
case UTFEncoding.UTF_16:
|
case UTFEncoding.UTF_16:
|
||||||
assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
|
assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
|
||||||
encode(cast(wchar[])input, result);
|
encode(cast(wchar[])input, result);
|
||||||
|
@ -697,8 +697,12 @@ auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
|
||||||
encode(cast(dchar[])input, result);
|
encode(cast(dchar[])input, result);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
catch(ConvException e) { result.errorMessage = e.msg; }
|
||||||
if(result.errorMessage !is null) { return result; }
|
catch(UTFException e) { result.errorMessage = e.msg; }
|
||||||
|
catch(Exception e)
|
||||||
|
{
|
||||||
|
assert(false, "Unexpected exception in encode(): " ~ e.msg);
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue