Merge pull request #182 from vibe-d/read_file_utf8_opt

Read file UTF-8 optimization
This commit is contained in:
Leonid Kramer 2019-10-25 20:58:05 +02:00 committed by GitHub
commit 8aae13b3c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 18 deletions

View file

@ -118,7 +118,9 @@ string readFileUTF8(NativePath path)
{ {
import vibe.internal.string; import vibe.internal.string;
return stripUTF8Bom(sanitizeUTF8(readFile(path))); auto data = readFile(path);
auto idata = () @trusted { return data.assumeUnique; } ();
return stripUTF8Bom(sanitizeUTF8(idata));
} }
/// ditto /// ditto
string readFileUTF8(string path) string readFileUTF8(string path)

View file

@ -25,24 +25,20 @@ import core.exception;
Takes a string with possibly invalid UTF8 sequences and outputs a valid UTF8 string as near to Takes a string with possibly invalid UTF8 sequences and outputs a valid UTF8 string as near to
the original as possible. the original as possible.
*/ */
string sanitizeUTF8(in ubyte[] str) string sanitizeUTF8(immutable(ubyte)[] str)
@safe pure { @safe pure {
import std.utf; import std.encoding : sanitize;
auto ret = appender!string(); auto ustr = cast(immutable(char)[])str;
ret.reserve(str.length); return () @trusted { return sanitize(ustr); } ();
size_t i = 0;
while (i < str.length) {
dchar ch = str[i];
try ch = std.utf.decode(cast(const(char[]))str, i);
catch( UTFException ){ i++; }
//catch( AssertError ){ i++; }
char[4] dst;
auto len = std.utf.encode(dst, ch);
ret.put(dst[0 .. len]);
} }
/// ditto
return ret.data; string sanitizeUTF8(in ubyte[] str)
@trusted pure {
import std.encoding : sanitize;
auto ustr = cast(immutable(char)[])str;
auto ret = sanitize(ustr);
if (ret.ptr is ustr.ptr) return ustr.idup;
else return ret;
} }
/** /**