Reader is now using MemoryStream, using tinyendian to fix endianness issues.

This commit is contained in:
Ferdinand Majerech 2014-07-22 01:09:27 +02:00
parent e9a18c62b0
commit 72afe53ab8

View file

@ -65,7 +65,7 @@ final class Reader
{
private:
// Input stream.
EndianStream stream_;
MemoryStream memStream_;
// Allocated space for buffer_.
dchar[] bufferAllocated_ = null;
// Buffer of currently loaded characters.
@ -81,6 +81,12 @@ final class Reader
// Decoder reading data from file and decoding it to UTF-32.
UTFFastDecoder decoder_;
version(unittest)
{
// Endianness of the input before it was converted (for testing)
Endian endian_;
}
public:
/// Construct a Reader.
///
@ -88,15 +94,18 @@ final class Reader
///
/// Throws: ReaderException if the stream is invalid.
this(Stream stream) @trusted //!nothrow
in
{
assert(stream.readable && stream.seekable,
"Can't read YAML from a stream that is not readable and seekable");
auto streamBytes = streamToBytesGC(stream);
auto result = fixUTFByteOrder(streamBytes);
if(result.bytesStripped > 0)
{
throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned "
"to 2 or 4 bytes, respectively");
}
body
{
stream_ = new EndianStream(stream);
decoder_ = UTFFastDecoder(stream_);
version(unittest) { endian_ = result.endian; }
memStream_ = new MemoryStream(result.array);
decoder_ = UTFFastDecoder(memStream_, result.encoding);
}
@trusted nothrow @nogc ~this()
@ -294,7 +303,7 @@ final class Reader
void loadChars(size_t chars) @system
{
const oldLength = buffer_.length;
const oldPosition = stream_.position;
const oldPosition = memStream_.position;
bufferReserve(buffer_.length + chars);
buffer_ = bufferAllocated_[0 .. buffer_.length + chars];
@ -324,7 +333,7 @@ final class Reader
try{throw e;}
catch(UTFException e)
{
const position = stream_.position;
const position = memStream_.position;
throw new ReaderException(format("Unicode decoding error between bytes %s and %s : %s",
oldPosition, position, e.msg));
}
@ -398,7 +407,7 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
// Bytes available in the stream.
size_t available_;
// Input stream.
EndianStream stream_;
MemoryStream stream_;
// Buffer used to store raw UTF-8 or UTF-16 code points.
union
@ -416,59 +425,17 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
public:
/// Construct a UTFBlockDecoder decoding a stream.
this(EndianStream stream) @trusted //!nothrow
this(MemoryStream stream, UTFEncoding encoding) @trusted
{
stream_ = stream;
available_ = stream_.available;
//Handle files short enough not to have a BOM.
if(available_ < 2)
encoding_ = encoding;
final switch(encoding_)
{
encoding_ = Encoding.UTF_8;
maxChars_ = 0;
if(available_ == 1)
{
bufferSpace_[0] = stream_.getc();
buffer_ = bufferSpace_[0 .. 1];
maxChars_ = 1;
case UTFEncoding.UTF_8: maxChars_ = available_; break;
case UTFEncoding.UTF_16: maxChars_ = available_ / 2; break;
case UTFEncoding.UTF_32: maxChars_ = available_ / 2; break;
}
return;
}
char[] rawBuffer8;
wchar[] rawBuffer16;
//readBOM will determine and set stream endianness.
switch(stream_.readBOM(2))
{
case -1:
//readBOM() eats two more bytes in this case so get them back.
const wchar bytes = stream_.getcw();
rawBuffer8_[0 .. 2] = [cast(ubyte)(bytes % 256), cast(ubyte)(bytes / 256)];
rawUsed_ = 2;
goto case 0;
case 0:
maxChars_ = available_;
encoding_ = Encoding.UTF_8;
break;
case 1, 2:
maxChars_ = available_ / 2;
//readBOM() eats two more bytes in this case so get them back.
encoding_ = Encoding.UTF_16;
rawBuffer16_[0] = stream_.getcw();
rawUsed_ = 1;
enforce(available_ % 2 == 0,
new ReaderException("Odd byte count in an UTF-16 stream"));
break;
case 3, 4:
maxChars_ = available_ / 4;
encoding_ = Encoding.UTF_32;
enforce(available_ % 4 == 0,
new ReaderException("Byte count in an UTF-32 stream not divisible by 4"));
break;
default: assert(false, "Unknown UTF BOM");
}
available_ = stream_.available;
}
/// Get maximum number of characters that might be in the stream.
@ -658,7 +625,7 @@ void testEndian(R)()
{
auto reader = new R(new MemoryStream(data));
assert(reader.encoding == encoding_expected);
assert(reader.stream_.endian == endian_expected);
assert(reader.endian_ == endian_expected);
}
ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A];