UTFDecoder now works directly with a buffer, not a stream.
This commit is contained in:
parent
7d4df103a7
commit
e30f6e0e80
|
@ -401,10 +401,10 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||||
UTFEncoding encoding_;
|
UTFEncoding encoding_;
|
||||||
// Maximum number of characters that might be in the stream.
|
// Maximum number of characters that might be in the stream.
|
||||||
size_t maxChars_;
|
size_t maxChars_;
|
||||||
// Bytes available in the stream.
|
// The entire input buffer.
|
||||||
size_t available_;
|
ubyte[] inputAll_;
|
||||||
// Input stream.
|
// Part of the input buffer that has not yet been decoded.
|
||||||
MemoryStream stream_;
|
ubyte[] input_;
|
||||||
|
|
||||||
// Buffer used to store raw UTF-8 or UTF-16 code points.
|
// Buffer used to store raw UTF-8 or UTF-16 code points.
|
||||||
union
|
union
|
||||||
|
@ -415,23 +415,23 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||||
// Used space (in items) in rawBuffer8_/rawBuffer16_.
|
// Used space (in items) in rawBuffer8_/rawBuffer16_.
|
||||||
size_t rawUsed_;
|
size_t rawUsed_;
|
||||||
|
|
||||||
// Space used by decodedBuffer_.
|
// Space used by decoded_.
|
||||||
dchar[bufferSize_] decodedBufferSpace_;
|
dchar[bufferSize_] decodedSpace_;
|
||||||
// Buffer of decoded, UTF-32 characters. This is a slice into decodedBufferSpace_.
|
// Buffer of decoded, UTF-32 characters. This is a slice into decodedSpace_.
|
||||||
dchar[] decodedBuffer_;
|
dchar[] decoded_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Construct a UTFBlockDecoder decoding data from a buffer.
|
/// Construct a UTFBlockDecoder decoding data from a buffer.
|
||||||
this(ubyte[] buffer, UTFEncoding encoding) @trusted
|
this(ubyte[] buffer, UTFEncoding encoding) @trusted
|
||||||
{
|
{
|
||||||
stream_ = new MemoryStream(buffer);
|
inputAll_ = buffer;
|
||||||
available_ = stream_.available;
|
input_ = inputAll_[];
|
||||||
encoding_ = encoding;
|
encoding_ = encoding;
|
||||||
final switch(encoding_)
|
final switch(encoding_)
|
||||||
{
|
{
|
||||||
case UTFEncoding.UTF_8: maxChars_ = available_; break;
|
case UTFEncoding.UTF_8: maxChars_ = input_.length; break;
|
||||||
case UTFEncoding.UTF_16: maxChars_ = available_ / 2; break;
|
case UTFEncoding.UTF_16: maxChars_ = input_.length / 2; break;
|
||||||
case UTFEncoding.UTF_32: maxChars_ = available_ / 2; break;
|
case UTFEncoding.UTF_32: maxChars_ = input_.length / 2; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -441,27 +441,27 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||||
/// Get encoding we're decoding from.
|
/// Get encoding we're decoding from.
|
||||||
UTFEncoding encoding() const pure @safe nothrow @nogc { return encoding_; }
|
UTFEncoding encoding() const pure @safe nothrow @nogc { return encoding_; }
|
||||||
|
|
||||||
/// Get the current position in stream.
|
/// Get the current position in buffer.
|
||||||
size_t position() @trusted { return stream_.position; }
|
size_t position() @trusted { return inputAll_.length - input_.length; }
|
||||||
|
|
||||||
/// Are we done decoding?
|
/// Are we done decoding?
|
||||||
bool done() const pure @safe nothrow @nogc
|
bool done() const pure @safe nothrow @nogc
|
||||||
{
|
{
|
||||||
return rawUsed_ == 0 && decodedBuffer_.length == 0 && available_ == 0;
|
return rawUsed_ == 0 && decoded_.length == 0 && input_.length == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get next character.
|
/// Get next character.
|
||||||
dchar getDChar()
|
dchar getDChar()
|
||||||
@safe
|
@safe
|
||||||
{
|
{
|
||||||
if(decodedBuffer_.length)
|
if(decoded_.length)
|
||||||
{
|
{
|
||||||
const result = decodedBuffer_[0];
|
const result = decoded_[0];
|
||||||
decodedBuffer_ = decodedBuffer_[1 .. $];
|
decoded_ = decoded_[1 .. $];
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(available_ > 0 || rawUsed_ > 0);
|
assert(input_.length > 0 || rawUsed_ > 0);
|
||||||
updateBuffer();
|
updateBuffer();
|
||||||
return getDChar();
|
return getDChar();
|
||||||
}
|
}
|
||||||
|
@ -470,15 +470,15 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||||
const(dchar[]) getDChars(size_t maxChars = size_t.max)
|
const(dchar[]) getDChars(size_t maxChars = size_t.max)
|
||||||
@safe
|
@safe
|
||||||
{
|
{
|
||||||
if(decodedBuffer_.length)
|
if(decoded_.length)
|
||||||
{
|
{
|
||||||
const slice = min(decodedBuffer_.length, maxChars);
|
const slice = min(decoded_.length, maxChars);
|
||||||
const result = decodedBuffer_[0 .. slice];
|
const result = decoded_[0 .. slice];
|
||||||
decodedBuffer_ = decodedBuffer_[slice .. $];
|
decoded_ = decoded_[slice .. $];
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(available_ > 0 || rawUsed_ > 0);
|
assert(input_.length > 0 || rawUsed_ > 0);
|
||||||
updateBuffer();
|
updateBuffer();
|
||||||
return getDChars(maxChars);
|
return getDChars(maxChars);
|
||||||
}
|
}
|
||||||
|
@ -487,37 +487,37 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||||
// Read and decode characters from file and store them in the buffer.
|
// Read and decode characters from file and store them in the buffer.
|
||||||
void updateBuffer() @trusted
|
void updateBuffer() @trusted
|
||||||
{
|
{
|
||||||
assert(decodedBuffer_.length == 0,
|
assert(decoded_.length == 0,
|
||||||
"updateBuffer can only be called when the buffer is empty");
|
"updateBuffer can only be called when the buffer is empty");
|
||||||
final switch(encoding_)
|
final switch(encoding_)
|
||||||
{
|
{
|
||||||
case UTFEncoding.UTF_8:
|
case UTFEncoding.UTF_8:
|
||||||
const bytes = min(bufferSize_ - rawUsed_, available_);
|
const bytes = min(bufferSize_ - rawUsed_, input_.length);
|
||||||
// Current length of valid data in rawBuffer8_.
|
// Current length of valid data in rawBuffer8_.
|
||||||
const rawLength = rawUsed_ + bytes;
|
const rawLength = rawUsed_ + bytes;
|
||||||
stream_.readExact(rawBuffer8_.ptr + rawUsed_, bytes);
|
rawBuffer8_[rawUsed_ .. rawUsed_ + bytes] = cast(char[])input_[0 .. bytes];
|
||||||
available_ -= bytes;
|
input_ = input_[bytes .. $];
|
||||||
decodeRawBuffer(rawBuffer8_, rawLength);
|
decodeRawBuffer(rawBuffer8_, rawLength);
|
||||||
break;
|
break;
|
||||||
case UTFEncoding.UTF_16:
|
case UTFEncoding.UTF_16:
|
||||||
const words = min((bufferSize_ / 2) - rawUsed_, available_ / 2);
|
const words = min((bufferSize_ / 2) - rawUsed_, input_.length / 2);
|
||||||
// Current length of valid data in rawBuffer16_.
|
// Current length of valid data in rawBuffer16_.
|
||||||
const rawLength = rawUsed_ + words;
|
const rawLength = rawUsed_ + words;
|
||||||
foreach(c; rawUsed_ .. rawLength)
|
foreach(c; rawUsed_ .. rawLength)
|
||||||
{
|
{
|
||||||
stream_.read(rawBuffer16_[c]);
|
rawBuffer16_[c] = *cast(wchar*)input_.ptr;
|
||||||
available_ -= 2;
|
input_ = input_[2 .. $];
|
||||||
}
|
}
|
||||||
decodeRawBuffer(rawBuffer16_, rawLength);
|
decodeRawBuffer(rawBuffer16_, rawLength);
|
||||||
break;
|
break;
|
||||||
case UTFEncoding.UTF_32:
|
case UTFEncoding.UTF_32:
|
||||||
const chars = min(bufferSize_ / 4, available_ / 4);
|
const chars = min(bufferSize_ / 4, input_.length / 4);
|
||||||
foreach(c; 0 .. chars)
|
foreach(c; 0 .. chars)
|
||||||
{
|
{
|
||||||
stream_.read(decodedBufferSpace_[c]);
|
decodedSpace_[c] = *cast(dchar*)input_.ptr;
|
||||||
available_ -= 4;
|
input_ = input_[4 .. $];
|
||||||
}
|
}
|
||||||
decodedBuffer_ = decodedBufferSpace_[0 .. chars];
|
decoded_ = decodedSpace_[0 .. chars];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -584,15 +584,15 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
||||||
const c = source[srcpos];
|
const c = source[srcpos];
|
||||||
if(c < 0x80)
|
if(c < 0x80)
|
||||||
{
|
{
|
||||||
decodedBufferSpace_[bufpos++] = c;
|
decodedSpace_[bufpos++] = c;
|
||||||
++srcpos;
|
++srcpos;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
decodedBufferSpace_[bufpos++] = decode(source, srcpos);
|
decodedSpace_[bufpos++] = decode(source, srcpos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
decodedBuffer_ = decodedBufferSpace_[0 .. bufpos];
|
decoded_ = decodedSpace_[0 .. bufpos];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue