From 7affea320a0211a3b063163d8b99b8f1baf67914 Mon Sep 17 00:00:00 2001 From: Ferdinand Majerech Date: Tue, 22 Jul 2014 03:31:56 +0200 Subject: [PATCH] Rewroted UTFDecoder to enable nothrow. --- source/dyaml/reader.d | 87 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d index 0df0db8..29a3fef 100644 --- a/source/dyaml/reader.d +++ b/source/dyaml/reader.d @@ -312,33 +312,20 @@ final class Reader new ReaderException("Special unicode characters are not allowed")); } - try for(size_t c = 0; chars && !decoder_.done;) + for(size_t c = 0; chars && !decoder_.done;) { const slice = decoder_.getDChars(chars); + if(slice is null) + { + const msg = decoder_.getAndClearErrorMessage(); + throw new ReaderException( + "Unicode decoding error between bytes %s and %s : %s" + .format(oldPosition, decoder_.position, msg)); + } buffer_[oldLength + c .. oldLength + c + slice.length] = slice[]; c += slice.length; chars -= slice.length; } - catch(Exception e) - { - handleLoadCharsException(e, oldPosition); - } - } - - // Handle an exception thrown in loadChars method of any Reader. - void handleLoadCharsException(Exception e, ulong oldPosition) @system - { - try{throw e;} - catch(UTFException e) - { - const position = decoder_.position; - throw new ReaderException(format("Unicode decoding error between bytes %s and %s : %s", - oldPosition, position, e.msg)); - } - catch(ReadException e) - { - throw new ReaderException(e.msg); - } } // Code shared by loadEntireFile methods. @@ -421,6 +408,12 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) // Buffer of decoded, UTF-32 characters. This is a slice into decodedSpace_. dchar[] decoded_; + // Current error message. + // + // To be fully nothrow, we use return values and the user (Reader) can check + // for a detailed error message if they get an error return. + string errorMessage_; + public: /// Construct a UTFBlockDecoder decoding data from a buffer. this(ubyte[] buffer, UTFEncoding encoding) @trusted @@ -444,6 +437,17 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) /// Get the current position in buffer. size_t position() @trusted { return inputAll_.length - input_.length; } + /// Get the error message and clear it. + /// + /// Can only be used in case of an error return from e.g. getDChars(). + string getAndClearErrorMessage() @safe pure nothrow @nogc + { + assert(errorMessage_ !is null, + "Trying to get an error message when there's no error"); + const result = errorMessage_; + errorMessage_ = null; + return errorMessage_; + } /// Are we done decoding? bool done() const pure @safe nothrow @nogc @@ -451,21 +455,6 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) return rawUsed_ == 0 && decoded_.length == 0 && input_.length == 0; } - /// Get next character. - dchar getDChar() - @safe - { - if(decoded_.length) - { - const result = decoded_[0]; - decoded_ = decoded_[1 .. $]; - return result; - } - - assert(input_.length > 0 || rawUsed_ > 0); - updateBuffer(); - return getDChar(); - } /// Get as many characters as possible, but at most maxChars. Slice returned will be invalidated in further calls. const(dchar[]) getDChars(size_t maxChars = size_t.max) @@ -476,12 +465,15 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) const slice = min(decoded_.length, maxChars); const result = decoded_[0 .. slice]; decoded_ = decoded_[slice .. $]; + assert(result !is null, + "NULL error on a getDChars call without an error"); return result; } assert(input_.length > 0 || rawUsed_ > 0); updateBuffer(); - return getDChars(maxChars); + // updateBuffer may fail + return errorMessage_ is null ? getDChars(maxChars) : null; } private: @@ -524,6 +516,8 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) } // Decode contents of a UTF-8 or UTF-16 raw buffer. + // + // On error, errorMessage_ will be set. void decodeRawBuffer(C)(C[] buffer, const size_t length) @safe pure { @@ -532,10 +526,14 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) const end = endOfLastUTFSequence(buffer, length); // If end is 0, there are no full UTF-8 chars. // This can happen at the end of file if there is an incomplete UTF-8 sequence. - enforce(end > 0, - new ReaderException("Invalid UTF-8 character at the end of buffer")); + if(end <= 0) + { + errorMessage_ = "Invalid UTF-8 character at the end of buffer"; + return; + } decodeUTF(buffer[0 .. end]); + if(errorMessage_ !is null) { return; } // After decoding, any code points not decoded go to the start of raw buffer. rawUsed_ = length - end; @@ -588,10 +586,19 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0) decodedSpace_[bufpos++] = c; ++srcpos; } - else + else try { decodedSpace_[bufpos++] = decode(source, srcpos); } + catch(UTFException e) + { + errorMessage_ = e.msg; + return; + } + catch(Exception e) + { + assert(false, "Unexpected exception in Reader.decodeUTF " ~ e.msg); + } } decoded_ = decodedSpace_[0 .. bufpos]; }