Merge pull request #182 from vibe-d/read_file_utf8_opt

Read file UTF-8 optimization
This commit is contained in:
Leonid Kramer 2019-10-25 20:58:05 +02:00 committed by GitHub
commit 8aae13b3c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 18 deletions

View file

@ -118,7 +118,9 @@ string readFileUTF8(NativePath path)
{ {
import vibe.internal.string; import vibe.internal.string;
return stripUTF8Bom(sanitizeUTF8(readFile(path))); auto data = readFile(path);
auto idata = () @trusted { return data.assumeUnique; } ();
return stripUTF8Bom(sanitizeUTF8(idata));
} }
/// ditto /// ditto
string readFileUTF8(string path) string readFileUTF8(string path)

View file

@ -25,24 +25,20 @@ import core.exception;
Takes a string with possibly invalid UTF8 sequences and outputs a valid UTF8 string as near to Takes a string with possibly invalid UTF8 sequences and outputs a valid UTF8 string as near to
the original as possible. the original as possible.
*/ */
string sanitizeUTF8(in ubyte[] str) string sanitizeUTF8(immutable(ubyte)[] str)
@safe pure { @safe pure {
import std.utf; import std.encoding : sanitize;
auto ret = appender!string(); auto ustr = cast(immutable(char)[])str;
ret.reserve(str.length); return () @trusted { return sanitize(ustr); } ();
}
size_t i = 0; /// ditto
while (i < str.length) { string sanitizeUTF8(in ubyte[] str)
dchar ch = str[i]; @trusted pure {
try ch = std.utf.decode(cast(const(char[]))str, i); import std.encoding : sanitize;
catch( UTFException ){ i++; } auto ustr = cast(immutable(char)[])str;
//catch( AssertError ){ i++; } auto ret = sanitize(ustr);
char[4] dst; if (ret.ptr is ustr.ptr) return ustr.idup;
auto len = std.utf.encode(dst, ch); else return ret;
ret.put(dst[0 .. len]);
}
return ret.data;
} }
/** /**