2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-21 22:21:42 +00:00
|
|
|
// Copyright Ferdinand Majerech 2011-2014.
|
2011-08-16 12:53:13 +00:00
|
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
|
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
|
|
|
// http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
|
|
|
|
module dyaml.reader;
|
|
|
|
|
|
|
|
|
2011-11-16 02:10:29 +00:00
|
|
|
import core.stdc.stdlib;
|
2011-08-16 12:53:13 +00:00
|
|
|
import core.stdc.string;
|
2011-11-16 02:10:29 +00:00
|
|
|
import core.thread;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
import std.algorithm;
|
|
|
|
import std.conv;
|
|
|
|
import std.exception;
|
|
|
|
import std.stdio;
|
|
|
|
import std.string;
|
|
|
|
import std.system;
|
|
|
|
import std.utf;
|
|
|
|
|
2014-07-21 22:23:15 +00:00
|
|
|
import tinyendian;
|
|
|
|
|
2011-10-24 18:36:26 +00:00
|
|
|
import dyaml.fastcharsearch;
|
2011-10-11 13:58:23 +00:00
|
|
|
import dyaml.encoding;
|
2011-08-16 12:53:13 +00:00
|
|
|
import dyaml.exception;
|
2014-07-21 23:04:44 +00:00
|
|
|
import dyaml.streamcompat;
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
package:
|
|
|
|
|
2014-07-21 22:33:17 +00:00
|
|
|
//XXX VIM STUFF:
|
|
|
|
//XXX THE f/t COLORING PLUGIN, AND TRY TO REMOVE THE f/t AUTOREPEAT PLUGIN
|
|
|
|
// (AND MAYBE DO THE REPEAT WITH ALT-T/ALT-F
|
|
|
|
//XXX DDOC snippets such as $D, $BIGOH, anything else
|
|
|
|
// OR MAYBE JUST $ - EXPANDING TO $(${1} ${2})
|
|
|
|
// WHERE DEFAULT ${1} IS 'D' AND SPECIAL SNIPPETS FOR SPECIFIC DDOC MACROS
|
|
|
|
// (E.G. XREF HAS 2 ARGS)
|
|
|
|
// XXX DON'T FORGET TO COMMIT DSNIPS CHANGES
|
|
|
|
// XXX SNIPPETS: WHY CAN'T WE USE NEW IN NEW? FIX!
|
|
|
|
// XXX ALSO WRITELN VISUAL! (print whatever we have selected)
|
|
|
|
// XXX AND ``fun`` VISUAL TOO!
|
|
|
|
// XXX snippet to print variable along its name AND
|
|
|
|
// OR MULTIPLE VARS - USE std.format!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
///Exception thrown at Reader errors.
|
|
|
|
class ReaderException : YAMLException
|
|
|
|
{
|
2011-10-13 09:30:14 +00:00
|
|
|
this(string msg, string file = __FILE__, int line = __LINE__)
|
2013-12-17 14:16:43 +00:00
|
|
|
@safe pure nothrow
|
2011-10-13 09:30:14 +00:00
|
|
|
{
|
2014-07-22 00:40:14 +00:00
|
|
|
super("Reader error: " ~ msg, file, line);
|
2011-10-13 09:30:14 +00:00
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Lazily reads and decodes data from a buffer, only storing as much as needed at any
|
|
|
|
/// moment.
|
2014-07-21 23:04:44 +00:00
|
|
|
///
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Adds a '\0' to the end of the data.
|
2011-08-16 12:53:13 +00:00
|
|
|
final class Reader
|
|
|
|
{
|
|
|
|
private:
|
2014-07-21 23:09:27 +00:00
|
|
|
// Allocated space for buffer_.
|
2011-11-16 02:10:29 +00:00
|
|
|
dchar[] bufferAllocated_ = null;
|
2014-07-21 22:41:13 +00:00
|
|
|
// Buffer of currently loaded characters.
|
2011-11-16 02:10:29 +00:00
|
|
|
dchar[] buffer_ = null;
|
2014-07-21 22:41:13 +00:00
|
|
|
// Current position within buffer. Only data after this position can be read.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint bufferOffset_ = 0;
|
2014-07-22 00:40:14 +00:00
|
|
|
// Index of the current character in the buffer.
|
2011-08-16 12:53:13 +00:00
|
|
|
size_t charIndex_ = 0;
|
2014-07-21 22:41:13 +00:00
|
|
|
// Current line in file.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint line_;
|
2014-07-21 22:41:13 +00:00
|
|
|
// Current column in file.
|
2011-08-16 12:53:13 +00:00
|
|
|
uint column_;
|
2014-07-21 22:41:13 +00:00
|
|
|
// Decoder reading data from file and decoding it to UTF-32.
|
2011-11-16 02:10:29 +00:00
|
|
|
UTFFastDecoder decoder_;
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-21 23:09:27 +00:00
|
|
|
version(unittest)
|
|
|
|
{
|
|
|
|
// Endianness of the input before it was converted (for testing)
|
|
|
|
Endian endian_;
|
|
|
|
}
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
public:
|
2014-07-22 00:40:44 +00:00
|
|
|
import std.stream;
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Construct a Reader.
|
|
|
|
///
|
|
|
|
/// Params: stream = Input stream. Must be readable and seekable.
|
|
|
|
///
|
|
|
|
/// Throws: ReaderException if the stream is invalid.
|
2014-07-19 13:38:09 +00:00
|
|
|
this(Stream stream) @trusted //!nothrow
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-21 23:09:27 +00:00
|
|
|
auto streamBytes = streamToBytesGC(stream);
|
|
|
|
auto result = fixUTFByteOrder(streamBytes);
|
|
|
|
if(result.bytesStripped > 0)
|
|
|
|
{
|
|
|
|
throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned "
|
|
|
|
"to 2 or 4 bytes, respectively");
|
|
|
|
}
|
|
|
|
|
|
|
|
version(unittest) { endian_ = result.endian; }
|
2014-07-22 00:15:33 +00:00
|
|
|
decoder_ = UTFFastDecoder(result.array, result.encoding);
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-21 07:56:41 +00:00
|
|
|
@trusted nothrow @nogc ~this()
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-21 23:03:19 +00:00
|
|
|
// Delete the buffer, if allocated.
|
2011-11-16 02:10:29 +00:00
|
|
|
if(bufferAllocated_ is null){return;}
|
|
|
|
free(bufferAllocated_.ptr);
|
2011-10-24 22:16:43 +00:00
|
|
|
buffer_ = bufferAllocated_ = null;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Get character at specified index relative to current position.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Params: index = Index of the character to get relative to current position
|
2014-07-22 00:40:14 +00:00
|
|
|
/// in the buffer.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Returns: Character at specified position.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Throws: ReaderException if trying to read past the end of the buffer
|
2014-07-21 22:41:13 +00:00
|
|
|
/// or if invalid data is read.
|
2014-07-22 01:34:17 +00:00
|
|
|
dchar peek(size_t index = 0) @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-11-16 02:10:29 +00:00
|
|
|
if(buffer_.length < bufferOffset_ + index + 1)
|
2011-10-25 18:23:44 +00:00
|
|
|
{
|
|
|
|
updateBuffer(index + 1);
|
|
|
|
}
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2011-10-25 18:23:44 +00:00
|
|
|
if(buffer_.length <= bufferOffset_ + index)
|
2011-10-23 18:17:37 +00:00
|
|
|
{
|
2014-07-22 00:40:14 +00:00
|
|
|
throw new ReaderException("Trying to read past the end of the buffer");
|
2011-10-23 18:17:37 +00:00
|
|
|
}
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
return buffer_[bufferOffset_ + index];
|
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Get specified number of characters starting at current position.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Note: This gets only a "view" into the internal buffer,
|
|
|
|
/// which WILL get invalidated after other Reader calls.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Params: length = Number of characters to get.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Returns: Characters starting at current position or an empty slice if out of bounds.
|
2012-09-08 23:42:13 +00:00
|
|
|
const(dstring) prefix(size_t length) @safe
|
2011-10-29 16:21:44 +00:00
|
|
|
{
|
|
|
|
return slice(0, length);
|
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Get a slice view of the internal buffer.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Note: This gets only a "view" into the internal buffer,
|
|
|
|
/// which WILL get invalidated after other Reader calls.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Params: start = Start of the slice relative to current position.
|
|
|
|
/// end = End of the slice relative to current position.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Returns: Slice into the internal buffer or an empty slice if out of bounds.
|
2012-09-08 23:42:13 +00:00
|
|
|
const(dstring) slice(size_t start, size_t end) @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-29 16:21:44 +00:00
|
|
|
if(buffer_.length <= bufferOffset_ + end)
|
2011-10-25 18:23:44 +00:00
|
|
|
{
|
2011-10-29 16:21:44 +00:00
|
|
|
updateBuffer(end);
|
2011-10-25 18:23:44 +00:00
|
|
|
}
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2011-10-29 16:21:44 +00:00
|
|
|
end += bufferOffset_;
|
|
|
|
start += bufferOffset_;
|
|
|
|
end = min(buffer_.length, end);
|
|
|
|
|
2011-11-16 02:10:29 +00:00
|
|
|
return end > start ? cast(dstring)buffer_[start .. end] : "";
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Get the next character, moving buffer position beyond it.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Returns: Next character.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Throws: ReaderException if trying to read past the end of the buffer
|
2014-07-21 22:41:13 +00:00
|
|
|
/// or if invalid data is read.
|
2012-09-08 23:42:13 +00:00
|
|
|
dchar get() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
const result = peek();
|
|
|
|
forward();
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Get specified number of characters, moving buffer position beyond them.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Params: length = Number or characters to get.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Returns: Characters starting at current position.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Throws: ReaderException if trying to read past the end of the buffer
|
2014-07-21 22:41:13 +00:00
|
|
|
/// or if invalid data is read.
|
2012-09-08 23:42:13 +00:00
|
|
|
dstring get(size_t length) @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2012-09-08 23:42:13 +00:00
|
|
|
auto result = prefix(length).idup;
|
2011-08-16 12:53:13 +00:00
|
|
|
forward(length);
|
2012-09-08 23:42:13 +00:00
|
|
|
return result;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Move current position forward.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Params: length = Number of characters to move position forward.
|
2014-07-21 23:10:23 +00:00
|
|
|
///
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Throws: ReaderException if trying to read past the end of the buffer
|
2014-07-21 22:41:13 +00:00
|
|
|
/// or if invalid data is read.
|
2014-07-22 01:34:17 +00:00
|
|
|
void forward(size_t length = 1) @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-25 18:23:44 +00:00
|
|
|
if(buffer_.length <= bufferOffset_ + length + 1)
|
|
|
|
{
|
|
|
|
updateBuffer(length + 1);
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2011-11-16 02:10:29 +00:00
|
|
|
mixin FastCharSearch!"\n\u0085\u2028\u2029"d search;
|
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
while(length > 0)
|
|
|
|
{
|
|
|
|
const c = buffer_[bufferOffset_];
|
|
|
|
++bufferOffset_;
|
|
|
|
++charIndex_;
|
2011-10-23 18:17:37 +00:00
|
|
|
//New line.
|
2011-10-24 18:36:26 +00:00
|
|
|
if(search.canFind(c) || (c == '\r' && buffer_[bufferOffset_] != '\n'))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
++line_;
|
|
|
|
column_ = 0;
|
|
|
|
}
|
|
|
|
else if(c != '\uFEFF'){++column_;}
|
|
|
|
--length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Get a string describing current buffer position, used for error messages.
|
2014-07-21 23:23:15 +00:00
|
|
|
final Mark mark() @safe pure nothrow const @nogc { return Mark(line_, column_); }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Get current line number.
|
2014-07-21 23:23:15 +00:00
|
|
|
final uint line() @safe pure nothrow const @nogc { return line_; }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
/// Get current column number.
|
2014-07-21 23:23:15 +00:00
|
|
|
final uint column() @safe pure nothrow const @nogc { return column_; }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Get index of the current character in the buffer.
|
2014-07-21 23:23:15 +00:00
|
|
|
final size_t charIndex() @safe pure nothrow const @nogc { return charIndex_; }
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Get encoding of the input buffer.
|
2014-07-21 23:23:15 +00:00
|
|
|
final Encoding encoding() @safe pure nothrow const @nogc { return decoder_.encoding; }
|
2011-10-11 13:58:23 +00:00
|
|
|
|
2011-08-16 12:53:13 +00:00
|
|
|
private:
|
2014-07-21 22:41:13 +00:00
|
|
|
// Update buffer to be able to read length characters after buffer offset.
|
2014-07-21 23:10:23 +00:00
|
|
|
//
|
2014-07-22 00:40:14 +00:00
|
|
|
// If there are not enough characters in the buffer, it will get
|
2014-07-21 22:41:13 +00:00
|
|
|
// as many as possible.
|
2014-07-21 23:10:23 +00:00
|
|
|
//
|
2014-07-21 22:41:13 +00:00
|
|
|
// Params: length = Number of characters we need to read.
|
2014-07-21 23:10:23 +00:00
|
|
|
//
|
2014-07-22 00:40:14 +00:00
|
|
|
// Throws: ReaderException if trying to read past the end of the buffer
|
2014-07-21 22:41:13 +00:00
|
|
|
// or if invalid data is read.
|
2014-07-22 01:34:17 +00:00
|
|
|
void updateBuffer(const size_t length) @trusted
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-21 22:41:13 +00:00
|
|
|
// Get rid of unneeded data in the buffer.
|
2011-08-16 12:53:13 +00:00
|
|
|
if(bufferOffset_ > 0)
|
|
|
|
{
|
2014-07-21 07:57:59 +00:00
|
|
|
const size_t bufferLength = buffer_.length - bufferOffset_;
|
2011-08-16 12:53:13 +00:00
|
|
|
memmove(buffer_.ptr, buffer_.ptr + bufferOffset_,
|
|
|
|
bufferLength * dchar.sizeof);
|
2011-10-24 22:16:43 +00:00
|
|
|
buffer_ = buffer_[0 .. bufferLength];
|
2011-08-16 12:53:13 +00:00
|
|
|
bufferOffset_ = 0;
|
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
// Load chars in batches of at most 1024 bytes (256 chars)
|
2011-08-16 12:53:13 +00:00
|
|
|
while(buffer_.length <= bufferOffset_ + length)
|
|
|
|
{
|
2011-11-16 02:10:29 +00:00
|
|
|
loadChars(512);
|
2011-08-16 12:53:13 +00:00
|
|
|
|
2011-11-16 02:10:29 +00:00
|
|
|
if(decoder_.done)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
|
|
|
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
|
|
|
|
{
|
2011-10-24 22:16:43 +00:00
|
|
|
bufferReserve(buffer_.length + 1);
|
|
|
|
buffer_ = bufferAllocated_[0 .. buffer_.length + 1];
|
|
|
|
buffer_[$ - 1] = '\0';
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
// Load more characters to the buffer.
|
2014-07-21 23:10:23 +00:00
|
|
|
//
|
2014-07-21 22:41:13 +00:00
|
|
|
// Params: chars = Recommended number of characters to load.
|
|
|
|
// More characters might be loaded.
|
|
|
|
// Less will be loaded if not enough available.
|
2014-07-21 23:10:23 +00:00
|
|
|
//
|
2014-07-21 22:41:13 +00:00
|
|
|
// Throws: ReaderException on Unicode decoding error,
|
|
|
|
// if nonprintable characters are detected, or
|
2014-07-22 00:40:14 +00:00
|
|
|
// if there is an error reading from the buffer.
|
2014-07-21 23:10:23 +00:00
|
|
|
//
|
2014-07-22 01:34:17 +00:00
|
|
|
void loadChars(size_t chars) @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-23 18:17:37 +00:00
|
|
|
const oldLength = buffer_.length;
|
2014-07-22 00:11:16 +00:00
|
|
|
const oldPosition = decoder_.position;
|
2011-10-23 18:17:37 +00:00
|
|
|
|
2011-10-24 22:16:43 +00:00
|
|
|
bufferReserve(buffer_.length + chars);
|
|
|
|
buffer_ = bufferAllocated_[0 .. buffer_.length + chars];
|
2011-11-16 02:10:29 +00:00
|
|
|
scope(success)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-10-24 22:16:43 +00:00
|
|
|
buffer_ = buffer_[0 .. $ - chars];
|
2014-07-21 07:56:41 +00:00
|
|
|
enforce(printable(buffer_[oldLength .. $]),
|
2011-10-23 18:17:37 +00:00
|
|
|
new ReaderException("Special unicode characters are not allowed"));
|
|
|
|
}
|
|
|
|
|
2014-07-22 01:31:56 +00:00
|
|
|
for(size_t c = 0; chars && !decoder_.done;)
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
|
|
|
const slice = decoder_.getDChars(chars);
|
2014-07-22 01:31:56 +00:00
|
|
|
if(slice is null)
|
|
|
|
{
|
|
|
|
const msg = decoder_.getAndClearErrorMessage();
|
|
|
|
throw new ReaderException(
|
|
|
|
"Unicode decoding error between bytes %s and %s : %s"
|
|
|
|
.format(oldPosition, decoder_.position, msg));
|
|
|
|
}
|
2013-03-23 16:31:14 +00:00
|
|
|
buffer_[oldLength + c .. oldLength + c + slice.length] = slice[];
|
2011-11-16 02:10:29 +00:00
|
|
|
c += slice.length;
|
|
|
|
chars -= slice.length;
|
|
|
|
}
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
// Code shared by loadEntireFile methods.
|
2014-07-22 01:34:17 +00:00
|
|
|
void loadEntireFile_() @safe
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-11-16 02:10:29 +00:00
|
|
|
const maxChars = decoder_.maxChars;
|
|
|
|
bufferReserve(maxChars + 1);
|
|
|
|
loadChars(maxChars);
|
|
|
|
|
|
|
|
if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-11-16 02:10:29 +00:00
|
|
|
buffer_ = bufferAllocated_[0 .. buffer_.length + 1];
|
|
|
|
buffer_[$ - 1] = '\0';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
// Ensure there is space for at least capacity characters in bufferAllocated_.
|
2014-07-22 01:34:17 +00:00
|
|
|
void bufferReserve(const size_t capacity) @trusted nothrow @nogc
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
|
|
|
if(bufferAllocated_ !is null && bufferAllocated_.length >= capacity){return;}
|
|
|
|
|
2014-07-21 22:41:13 +00:00
|
|
|
// Handle first allocation as well as reallocation.
|
2014-07-21 07:56:41 +00:00
|
|
|
auto ptr = bufferAllocated_ !is null
|
2011-11-16 02:10:29 +00:00
|
|
|
? realloc(bufferAllocated_.ptr, capacity * dchar.sizeof)
|
|
|
|
: malloc(capacity * dchar.sizeof);
|
|
|
|
bufferAllocated_ = (cast(dchar*)ptr)[0 .. capacity];
|
|
|
|
buffer_ = bufferAllocated_[0 .. buffer_.length];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
alias UTFBlockDecoder!512 UTFFastDecoder;
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Decodes a buffer to UTF-32 in blocks.
|
2011-11-16 02:10:29 +00:00
|
|
|
struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
|
|
|
|
{
|
|
|
|
private:
|
2014-07-21 22:33:17 +00:00
|
|
|
// UTF-8 codepoint strides (0xFF are codepoints that can't start a sequence).
|
2011-11-16 02:10:29 +00:00
|
|
|
static immutable ubyte[256] utf8Stride =
|
|
|
|
[
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
|
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
|
|
|
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
|
|
|
|
];
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
// Encoding of the input buffer.
|
2014-07-21 22:23:15 +00:00
|
|
|
UTFEncoding encoding_;
|
2014-07-22 00:40:14 +00:00
|
|
|
// Maximum number of characters that might be in the buffer.
|
2011-11-16 02:10:29 +00:00
|
|
|
size_t maxChars_;
|
2014-07-22 00:37:06 +00:00
|
|
|
// The entire input buffer.
|
|
|
|
ubyte[] inputAll_;
|
|
|
|
// Part of the input buffer that has not yet been decoded.
|
|
|
|
ubyte[] input_;
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2014-07-21 22:33:17 +00:00
|
|
|
// Buffer used to store raw UTF-8 or UTF-16 code points.
|
2011-11-16 02:10:29 +00:00
|
|
|
union
|
|
|
|
{
|
|
|
|
char[bufferSize_] rawBuffer8_;
|
|
|
|
wchar[bufferSize_ / 2] rawBuffer16_;
|
|
|
|
}
|
2014-07-21 22:33:17 +00:00
|
|
|
// Used space (in items) in rawBuffer8_/rawBuffer16_.
|
2011-11-16 02:10:29 +00:00
|
|
|
size_t rawUsed_;
|
|
|
|
|
2014-07-22 00:37:06 +00:00
|
|
|
// Space used by decoded_.
|
|
|
|
dchar[bufferSize_] decodedSpace_;
|
|
|
|
// Buffer of decoded, UTF-32 characters. This is a slice into decodedSpace_.
|
|
|
|
dchar[] decoded_;
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2014-07-22 01:31:56 +00:00
|
|
|
// Current error message.
|
|
|
|
//
|
|
|
|
// To be fully nothrow, we use return values and the user (Reader) can check
|
|
|
|
// for a detailed error message if they get an error return.
|
|
|
|
string errorMessage_;
|
|
|
|
|
2011-11-16 02:10:29 +00:00
|
|
|
public:
|
2014-07-22 00:15:33 +00:00
|
|
|
/// Construct a UTFBlockDecoder decoding data from a buffer.
|
2014-07-22 01:33:36 +00:00
|
|
|
this(ubyte[] buffer, UTFEncoding encoding) @safe pure nothrow @nogc
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
inputAll_ = buffer;
|
|
|
|
input_ = inputAll_[];
|
|
|
|
encoding_ = encoding;
|
2014-07-21 23:09:27 +00:00
|
|
|
final switch(encoding_)
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
case UTFEncoding.UTF_8: maxChars_ = input_.length; break;
|
|
|
|
case UTFEncoding.UTF_16: maxChars_ = input_.length / 2; break;
|
|
|
|
case UTFEncoding.UTF_32: maxChars_ = input_.length / 2; break;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-22 00:40:14 +00:00
|
|
|
/// Get maximum number of characters that might be in the buffer.
|
2014-07-22 00:11:31 +00:00
|
|
|
size_t maxChars() const pure @safe nothrow @nogc { return maxChars_; }
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2014-07-21 22:33:17 +00:00
|
|
|
/// Get encoding we're decoding from.
|
2014-07-22 00:11:31 +00:00
|
|
|
UTFEncoding encoding() const pure @safe nothrow @nogc { return encoding_; }
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2014-07-22 00:37:06 +00:00
|
|
|
/// Get the current position in buffer.
|
2014-07-22 01:33:36 +00:00
|
|
|
size_t position() @safe pure nothrow const @nogc
|
|
|
|
{
|
|
|
|
return inputAll_.length - input_.length;
|
|
|
|
}
|
|
|
|
|
2014-07-22 01:31:56 +00:00
|
|
|
/// Get the error message and clear it.
|
|
|
|
///
|
|
|
|
/// Can only be used in case of an error return from e.g. getDChars().
|
|
|
|
string getAndClearErrorMessage() @safe pure nothrow @nogc
|
|
|
|
{
|
|
|
|
assert(errorMessage_ !is null,
|
|
|
|
"Trying to get an error message when there's no error");
|
|
|
|
const result = errorMessage_;
|
|
|
|
errorMessage_ = null;
|
|
|
|
return errorMessage_;
|
|
|
|
}
|
2014-07-22 00:11:16 +00:00
|
|
|
|
2014-07-21 22:33:17 +00:00
|
|
|
/// Are we done decoding?
|
2014-07-22 00:11:31 +00:00
|
|
|
bool done() const pure @safe nothrow @nogc
|
2014-07-21 07:56:41 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
return rawUsed_ == 0 && decoded_.length == 0 && input_.length == 0;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
|
2014-07-22 01:33:36 +00:00
|
|
|
/// Get as many characters as possible, but at most maxChars.
|
|
|
|
///
|
|
|
|
/// Returns: A slice with decoded characters or NULL on failure (in that case,
|
|
|
|
/// check getAndClearErrorMessage(). The slice $(B will) be invalidated
|
|
|
|
/// in further calls.
|
|
|
|
const(dchar[]) getDChars(size_t maxChars = size_t.max) @safe pure nothrow
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
if(decoded_.length)
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
const slice = min(decoded_.length, maxChars);
|
|
|
|
const result = decoded_[0 .. slice];
|
|
|
|
decoded_ = decoded_[slice .. $];
|
2014-07-22 01:31:56 +00:00
|
|
|
assert(result !is null,
|
|
|
|
"NULL error on a getDChars call without an error");
|
2011-11-16 02:10:29 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-07-22 00:37:06 +00:00
|
|
|
assert(input_.length > 0 || rawUsed_ > 0);
|
2011-11-16 02:10:29 +00:00
|
|
|
updateBuffer();
|
2014-07-22 01:31:56 +00:00
|
|
|
// updateBuffer may fail
|
|
|
|
return errorMessage_ is null ? getDChars(maxChars) : null;
|
2011-11-16 02:10:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2014-07-19 13:38:09 +00:00
|
|
|
// Read and decode characters from file and store them in the buffer.
|
2014-07-22 01:33:36 +00:00
|
|
|
//
|
|
|
|
// On error, errorMessage_ will be set.
|
|
|
|
void updateBuffer() @trusted pure nothrow
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
assert(decoded_.length == 0,
|
2014-07-19 13:39:27 +00:00
|
|
|
"updateBuffer can only be called when the buffer is empty");
|
2011-11-16 02:10:29 +00:00
|
|
|
final switch(encoding_)
|
|
|
|
{
|
2014-07-21 22:23:15 +00:00
|
|
|
case UTFEncoding.UTF_8:
|
2014-07-22 00:37:06 +00:00
|
|
|
const bytes = min(bufferSize_ - rawUsed_, input_.length);
|
|
|
|
rawBuffer8_[rawUsed_ .. rawUsed_ + bytes] = cast(char[])input_[0 .. bytes];
|
|
|
|
input_ = input_[bytes .. $];
|
2014-07-22 01:41:46 +00:00
|
|
|
// Current length of valid data in rawBuffer8_.
|
|
|
|
const rawLength = rawUsed_ + bytes;
|
2011-11-16 02:10:29 +00:00
|
|
|
decodeRawBuffer(rawBuffer8_, rawLength);
|
|
|
|
break;
|
2014-07-21 22:23:15 +00:00
|
|
|
case UTFEncoding.UTF_16:
|
2014-07-22 00:37:06 +00:00
|
|
|
const words = min((bufferSize_ / 2) - rawUsed_, input_.length / 2);
|
2014-07-22 01:41:46 +00:00
|
|
|
const bytes = 2 * words;
|
|
|
|
rawBuffer16_[rawUsed_ .. rawUsed_ + words] = cast(wchar[])input_[0 .. bytes];
|
|
|
|
input_ = input_[bytes .. $];
|
2014-07-21 22:33:17 +00:00
|
|
|
// Current length of valid data in rawBuffer16_.
|
2011-11-16 02:10:29 +00:00
|
|
|
const rawLength = rawUsed_ + words;
|
|
|
|
decodeRawBuffer(rawBuffer16_, rawLength);
|
|
|
|
break;
|
2014-07-21 22:23:15 +00:00
|
|
|
case UTFEncoding.UTF_32:
|
2014-07-22 00:37:06 +00:00
|
|
|
const chars = min(bufferSize_ / 4, input_.length / 4);
|
2014-07-22 01:41:46 +00:00
|
|
|
const bytes = 4 * chars;
|
|
|
|
decodedSpace_[0 .. chars] = cast(dchar[])input_[0 .. bytes];
|
|
|
|
input_ = input_[bytes .. $];
|
2014-07-22 00:37:06 +00:00
|
|
|
decoded_ = decodedSpace_[0 .. chars];
|
2011-11-16 02:10:29 +00:00
|
|
|
break;
|
|
|
|
}
|
2011-10-24 22:16:43 +00:00
|
|
|
}
|
|
|
|
|
2014-07-19 13:38:09 +00:00
|
|
|
// Decode contents of a UTF-8 or UTF-16 raw buffer.
|
2014-07-22 01:31:56 +00:00
|
|
|
//
|
|
|
|
// On error, errorMessage_ will be set.
|
2014-07-19 13:38:09 +00:00
|
|
|
void decodeRawBuffer(C)(C[] buffer, const size_t length)
|
2014-07-22 01:33:36 +00:00
|
|
|
@safe pure nothrow
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-21 07:56:41 +00:00
|
|
|
// End of part of rawBuffer8_ that contains
|
2014-07-19 13:39:27 +00:00
|
|
|
// complete characters and can be decoded.
|
2011-11-16 02:10:29 +00:00
|
|
|
const end = endOfLastUTFSequence(buffer, length);
|
2014-07-19 13:39:27 +00:00
|
|
|
// If end is 0, there are no full UTF-8 chars.
|
|
|
|
// This can happen at the end of file if there is an incomplete UTF-8 sequence.
|
2014-07-22 01:31:56 +00:00
|
|
|
if(end <= 0)
|
|
|
|
{
|
|
|
|
errorMessage_ = "Invalid UTF-8 character at the end of buffer";
|
|
|
|
return;
|
|
|
|
}
|
2011-11-16 02:10:29 +00:00
|
|
|
|
|
|
|
decodeUTF(buffer[0 .. end]);
|
2014-07-22 01:31:56 +00:00
|
|
|
if(errorMessage_ !is null) { return; }
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2014-07-19 13:39:27 +00:00
|
|
|
// After decoding, any code points not decoded go to the start of raw buffer.
|
2011-11-16 02:10:29 +00:00
|
|
|
rawUsed_ = length - end;
|
2014-07-19 13:39:27 +00:00
|
|
|
foreach(i; 0 .. rawUsed_) { buffer[i] = buffer[i + end]; }
|
2011-11-16 02:10:29 +00:00
|
|
|
}
|
|
|
|
|
2014-07-19 13:38:09 +00:00
|
|
|
// Determine the end of last UTF-8 or UTF-16 sequence in a raw buffer.
|
2014-07-21 07:56:41 +00:00
|
|
|
size_t endOfLastUTFSequence(C)(const C[] buffer, const size_t max)
|
2014-07-19 13:38:09 +00:00
|
|
|
@safe pure nothrow const @nogc
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
|
|
|
static if(is(C == char))
|
|
|
|
{
|
|
|
|
for(long end = max - 1; end >= 0; --end)
|
|
|
|
{
|
2012-01-22 09:37:44 +00:00
|
|
|
const s = utf8Stride[buffer[cast(size_t)end]];
|
2011-11-16 02:10:29 +00:00
|
|
|
if(s != 0xFF)
|
|
|
|
{
|
2014-07-19 13:39:27 +00:00
|
|
|
// If stride goes beyond end of the buffer (max), return end.
|
|
|
|
// Otherwise the last sequence ends at max, so we can return that.
|
2014-07-21 07:56:41 +00:00
|
|
|
// (Unless there is an invalid code point, which is
|
2014-07-19 13:39:27 +00:00
|
|
|
// caught at decoding)
|
2011-11-16 02:10:29 +00:00
|
|
|
return (s > max - end) ? cast(size_t)end : max;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2014-07-21 07:56:41 +00:00
|
|
|
else
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
|
|
|
size_t end = 0;
|
|
|
|
while(end < max)
|
|
|
|
{
|
|
|
|
const s = stride(buffer, end);
|
2014-07-19 13:39:27 +00:00
|
|
|
if(s + end > max) { break; }
|
2011-11-16 02:10:29 +00:00
|
|
|
end += s;
|
|
|
|
}
|
|
|
|
return end;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-19 13:38:09 +00:00
|
|
|
// Decode a UTF-8 or UTF-16 buffer (with no incomplete sequences at the end).
|
2014-07-22 01:33:36 +00:00
|
|
|
//
|
|
|
|
// On error, sets errorMessage_.
|
|
|
|
void decodeUTF(C)(const C[] source) @safe pure nothrow
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
|
|
|
size_t bufpos = 0;
|
|
|
|
const srclength = source.length;
|
|
|
|
for(size_t srcpos = 0; srcpos < srclength;)
|
|
|
|
{
|
|
|
|
const c = source[srcpos];
|
|
|
|
if(c < 0x80)
|
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
decodedSpace_[bufpos++] = c;
|
2011-11-16 02:10:29 +00:00
|
|
|
++srcpos;
|
|
|
|
}
|
2014-07-22 01:31:56 +00:00
|
|
|
else try
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
2014-07-22 00:37:06 +00:00
|
|
|
decodedSpace_[bufpos++] = decode(source, srcpos);
|
2011-11-16 02:10:29 +00:00
|
|
|
}
|
2014-07-22 01:31:56 +00:00
|
|
|
catch(UTFException e)
|
|
|
|
{
|
|
|
|
errorMessage_ = e.msg;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
catch(Exception e)
|
|
|
|
{
|
|
|
|
assert(false, "Unexpected exception in Reader.decodeUTF " ~ e.msg);
|
|
|
|
}
|
2011-11-16 02:10:29 +00:00
|
|
|
}
|
2014-07-22 00:37:06 +00:00
|
|
|
decoded_ = decodedSpace_[0 .. bufpos];
|
2011-11-16 02:10:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-19 13:38:09 +00:00
|
|
|
/// Determine if all characters in an array are printable.
|
2014-07-21 07:56:41 +00:00
|
|
|
///
|
2014-07-19 13:38:09 +00:00
|
|
|
/// Params: chars = Characters to check.
|
2014-07-21 07:56:41 +00:00
|
|
|
///
|
2014-07-19 13:38:09 +00:00
|
|
|
/// Returns: True if all the characters are printable, false otherwise.
|
2014-07-21 07:56:41 +00:00
|
|
|
bool printable(const dchar[] chars) @safe pure nothrow @nogc
|
2011-11-16 02:10:29 +00:00
|
|
|
{
|
|
|
|
foreach(c; chars)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-11-16 02:10:29 +00:00
|
|
|
if(!((c == 0x09 || c == 0x0A || c == 0x0D || c == 0x85) ||
|
|
|
|
(c >= 0x20 && c <= 0x7E) ||
|
|
|
|
(c >= 0xA0 && c <= '\uD7FF') ||
|
|
|
|
(c >= '\uE000' && c <= '\uFFFD')))
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2011-11-16 02:10:29 +00:00
|
|
|
return false;
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|
|
|
|
}
|
2011-11-16 02:10:29 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-07-22 01:33:50 +00:00
|
|
|
// Unittests.
|
2011-11-16 02:10:29 +00:00
|
|
|
|
2014-07-22 00:40:44 +00:00
|
|
|
import std.stream;
|
2011-11-16 02:10:29 +00:00
|
|
|
void testEndian(R)()
|
|
|
|
{
|
|
|
|
writeln(typeid(R).toString() ~ ": endian unittest");
|
|
|
|
void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected)
|
|
|
|
{
|
2014-07-21 07:53:28 +00:00
|
|
|
auto reader = new R(new MemoryStream(data));
|
2011-11-16 02:10:29 +00:00
|
|
|
assert(reader.encoding == encoding_expected);
|
2014-07-21 23:09:27 +00:00
|
|
|
assert(reader.endian_ == endian_expected);
|
2011-11-16 02:10:29 +00:00
|
|
|
}
|
|
|
|
ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
|
|
|
|
ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A];
|
|
|
|
endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian);
|
|
|
|
endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian);
|
|
|
|
}
|
|
|
|
|
|
|
|
void testPeekPrefixForward(R)()
|
|
|
|
{
|
|
|
|
writeln(typeid(R).toString() ~ ": peek/prefix/forward unittest");
|
|
|
|
ubyte[] data = ByteOrderMarks[BOM.UTF8] ~ cast(ubyte[])"data";
|
2014-07-21 07:53:28 +00:00
|
|
|
auto reader = new R(new MemoryStream(data));
|
2011-11-16 02:10:29 +00:00
|
|
|
assert(reader.peek() == 'd');
|
|
|
|
assert(reader.peek(1) == 'a');
|
|
|
|
assert(reader.peek(2) == 't');
|
|
|
|
assert(reader.peek(3) == 'a');
|
|
|
|
assert(reader.peek(4) == '\0');
|
|
|
|
assert(reader.prefix(4) == "data");
|
|
|
|
assert(reader.prefix(6) == "data\0");
|
|
|
|
reader.forward(2);
|
|
|
|
assert(reader.peek(1) == 'a');
|
|
|
|
assert(collectException(reader.peek(3)));
|
|
|
|
}
|
|
|
|
|
|
|
|
void testUTF(R)()
|
|
|
|
{
|
|
|
|
writeln(typeid(R).toString() ~ ": UTF formats unittest");
|
|
|
|
dchar[] data = cast(dchar[])"data";
|
|
|
|
void utf_test(T)(T[] data, BOM bom)
|
2011-08-16 12:53:13 +00:00
|
|
|
{
|
2014-07-21 07:56:41 +00:00
|
|
|
ubyte[] bytes = ByteOrderMarks[bom] ~
|
2011-11-16 02:10:29 +00:00
|
|
|
(cast(ubyte*)data.ptr)[0 .. data.length * T.sizeof];
|
2014-07-21 07:53:28 +00:00
|
|
|
auto reader = new R(new MemoryStream(bytes));
|
2011-08-16 12:53:13 +00:00
|
|
|
assert(reader.peek() == 'd');
|
|
|
|
assert(reader.peek(1) == 'a');
|
|
|
|
assert(reader.peek(2) == 't');
|
|
|
|
assert(reader.peek(3) == 'a');
|
|
|
|
}
|
2011-11-16 02:10:29 +00:00
|
|
|
utf_test!char(to!(char[])(data), BOM.UTF8);
|
|
|
|
utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.UTF16BE : BOM.UTF16LE);
|
|
|
|
utf_test(data, endian == Endian.bigEndian ? BOM.UTF32BE : BOM.UTF32LE);
|
|
|
|
}
|
|
|
|
|
2014-07-21 07:53:58 +00:00
|
|
|
void test1Byte(R)()
|
|
|
|
{
|
|
|
|
writeln(typeid(R).toString() ~ ": 1 byte file unittest");
|
|
|
|
ubyte[] data = [97];
|
|
|
|
|
|
|
|
auto reader = new R(new MemoryStream(data));
|
|
|
|
assert(reader.peek() == 'a');
|
|
|
|
assert(reader.peek(1) == '\0');
|
|
|
|
assert(collectException(reader.peek(2)));
|
|
|
|
}
|
|
|
|
|
2011-11-16 02:10:29 +00:00
|
|
|
unittest
|
|
|
|
{
|
|
|
|
testEndian!Reader();
|
|
|
|
testPeekPrefixForward!Reader();
|
|
|
|
testUTF!Reader();
|
2014-07-21 07:53:58 +00:00
|
|
|
test1Byte!Reader();
|
2011-08-16 12:53:13 +00:00
|
|
|
}
|