From 08951119dfb53675fb3a1c6fc6dfd61278b239d8 Mon Sep 17 00:00:00 2001 From: Cameron Ross Date: Mon, 7 Jan 2019 22:07:31 -0400 Subject: [PATCH] fix UTF-8 decoding in URI sequences --- source/dyaml/scanner.d | 20 +++++++++++++++++--- source/dyaml/test/resolver.d | 14 +++++++++++++- test/data/uri.data | 3 +++ test/data/uri.detect | 1 + 4 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 test/data/uri.data create mode 100644 test/data/uri.detect diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d index dae67e3..589c0bb 100644 --- a/source/dyaml/scanner.d +++ b/source/dyaml/scanner.d @@ -1974,7 +1974,14 @@ final class Scanner } else { - c = decode(bytes[], nextChar); + try + { + c = decode(bytes[], nextChar); + } + catch (UTFException) + { + return size_t.max; + } } reader_.sliceBuilder.write(c); if(bytes.length - nextChar > 0) @@ -2016,8 +2023,8 @@ final class Scanner uint digit; if(c - '0' < 10) { digit = c - '0'; } - else if(c - 'A' < 6) { digit = c - 'A'; } - else if(c - 'a' < 6) { digit = c - 'a'; } + else if(c - 'A' < 6) { digit = 10 + c - 'A'; } + else if(c - 'a' < 6) { digit = 10 + c - 'a'; } else { assert(false); } b += mult * digit; mult /= 16; @@ -2028,6 +2035,13 @@ final class Scanner } bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_); + if(bytesUsed == size_t.max) + { + error(contextMsg, startMark, + "Invalid UTF-8 data encoded in URI escape sequence", + reader_.mark); + return; + } } diff --git a/source/dyaml/test/resolver.d b/source/dyaml/test/resolver.d index ba4434e..f71fa3c 100644 --- a/source/dyaml/test/resolver.d +++ b/source/dyaml/test/resolver.d @@ -16,6 +16,12 @@ import std.string; import dyaml.test.common; +string construct(ref Node node) @safe +{ + return node.as!string; +} + + /** * Implicit tag resolution unittest. * @@ -37,7 +43,13 @@ void testImplicitResolver(string dataFilename, string detectFilename) @safe } correctTag = readText(detectFilename).strip(); - node = Loader.fromFile(dataFilename).load(); + + auto constructor = new Constructor; + constructor.addConstructorScalar("tag:example.com,2000:app/tag🤔", &construct); + auto loader = Loader.fromFile(dataFilename); + loader.constructor = constructor; + + node = loader.load(); assert(node.isSequence); foreach(ref Node scalar; node) { diff --git a/test/data/uri.data b/test/data/uri.data new file mode 100644 index 0000000..4532bd8 --- /dev/null +++ b/test/data/uri.data @@ -0,0 +1,3 @@ +%TAG !e! tag:example.com,2000:app/ +--- +- !e!tag%F0%9F%A4%94 baz diff --git a/test/data/uri.detect b/test/data/uri.detect new file mode 100644 index 0000000..981dd7f --- /dev/null +++ b/test/data/uri.detect @@ -0,0 +1 @@ +tag:example.com,2000:app/tag🤔