fix UTF-8 decoding in URI sequences

This commit is contained in:
Cameron Ross 2019-01-07 22:07:31 -04:00
parent 3374dc6544
commit 08951119df
No known key found for this signature in database
GPG key ID: 777897D98DC91C54
4 changed files with 34 additions and 4 deletions

View file

@ -1974,7 +1974,14 @@ final class Scanner
}
else
{
c = decode(bytes[], nextChar);
try
{
c = decode(bytes[], nextChar);
}
catch (UTFException)
{
return size_t.max;
}
}
reader_.sliceBuilder.write(c);
if(bytes.length - nextChar > 0)
@ -2016,8 +2023,8 @@ final class Scanner
uint digit;
if(c - '0' < 10) { digit = c - '0'; }
else if(c - 'A' < 6) { digit = c - 'A'; }
else if(c - 'a' < 6) { digit = c - 'a'; }
else if(c - 'A' < 6) { digit = 10 + c - 'A'; }
else if(c - 'a' < 6) { digit = 10 + c - 'a'; }
else { assert(false); }
b += mult * digit;
mult /= 16;
@ -2028,6 +2035,13 @@ final class Scanner
}
bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
if(bytesUsed == size_t.max)
{
error(contextMsg, startMark,
"Invalid UTF-8 data encoded in URI escape sequence",
reader_.mark);
return;
}
}

View file

@ -16,6 +16,12 @@ import std.string;
import dyaml.test.common;
string construct(ref Node node) @safe
{
return node.as!string;
}
/**
* Implicit tag resolution unittest.
*
@ -37,7 +43,13 @@ void testImplicitResolver(string dataFilename, string detectFilename) @safe
}
correctTag = readText(detectFilename).strip();
node = Loader.fromFile(dataFilename).load();
auto constructor = new Constructor;
constructor.addConstructorScalar("tag:example.com,2000:app/tag🤔", &construct);
auto loader = Loader.fromFile(dataFilename);
loader.constructor = constructor;
node = loader.load();
assert(node.isSequence);
foreach(ref Node scalar; node)
{

3
test/data/uri.data Normal file
View file

@ -0,0 +1,3 @@
%TAG !e! tag:example.com,2000:app/
---
- !e!tag%F0%9F%A4%94 baz

1
test/data/uri.detect Normal file
View file

@ -0,0 +1 @@
tag:example.com,2000:app/tag🤔