From 7affea320a0211a3b063163d8b99b8f1baf67914 Mon Sep 17 00:00:00 2001
From: Ferdinand Majerech <kiithsacmp@gmail.com>
Date: Tue, 22 Jul 2014 03:31:56 +0200
Subject: [PATCH] Rewroted UTFDecoder to enable nothrow.

---
 source/dyaml/reader.d | 87 +++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 40 deletions(-)

diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d
index 0df0db8..29a3fef 100644
--- a/source/dyaml/reader.d
+++ b/source/dyaml/reader.d
@@ -312,33 +312,20 @@ final class Reader
                         new ReaderException("Special unicode characters are not allowed"));
             }
 
-            try for(size_t c = 0; chars && !decoder_.done;)
+            for(size_t c = 0; chars && !decoder_.done;)
             {
                 const slice = decoder_.getDChars(chars);
+                if(slice is null)
+                {
+                    const msg = decoder_.getAndClearErrorMessage();
+                    throw new ReaderException(
+                              "Unicode decoding error between bytes %s and %s : %s"
+                              .format(oldPosition, decoder_.position, msg));
+                }
                 buffer_[oldLength + c .. oldLength + c + slice.length] = slice[];
                 c += slice.length;
                 chars -= slice.length;
             }
-            catch(Exception e)
-            {
-                handleLoadCharsException(e, oldPosition);
-            }
-        }
-
-        // Handle an exception thrown in loadChars method of any Reader.
-        void handleLoadCharsException(Exception e, ulong oldPosition) @system
-        {
-            try{throw e;}
-            catch(UTFException e)
-            {
-                const position = decoder_.position;
-                throw new ReaderException(format("Unicode decoding error between bytes %s and %s : %s",
-                                          oldPosition, position, e.msg));
-            }
-            catch(ReadException e)
-            {
-                throw new ReaderException(e.msg);
-            }
         }
 
         // Code shared by loadEntireFile methods.
@@ -421,6 +408,12 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
         // Buffer of decoded, UTF-32 characters. This is a slice into decodedSpace_.
         dchar[] decoded_;
 
+        // Current error message.
+        //
+        // To be fully nothrow, we use return values and the user (Reader) can check
+        // for a detailed error message if they get an error return.
+        string errorMessage_;
+
     public:
         /// Construct a UTFBlockDecoder decoding data from a buffer.
         this(ubyte[] buffer, UTFEncoding encoding) @trusted
@@ -444,6 +437,17 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
 
         /// Get the current position in buffer.
         size_t position() @trusted { return inputAll_.length - input_.length; }
+        /// Get the error message and clear it.
+        ///
+        /// Can only be used in case of an error return from e.g. getDChars().
+        string getAndClearErrorMessage() @safe pure nothrow @nogc
+        {
+            assert(errorMessage_ !is null,
+                   "Trying to get an error message when there's no error");
+            const result = errorMessage_;
+            errorMessage_ = null;
+            return errorMessage_;
+        }
 
         /// Are we done decoding?
         bool done() const pure @safe nothrow @nogc
@@ -451,21 +455,6 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
             return rawUsed_ == 0 && decoded_.length == 0 && input_.length == 0;
         }
 
-        /// Get next character.
-        dchar getDChar()
-            @safe
-        {
-            if(decoded_.length)
-            {
-                const result = decoded_[0];
-                decoded_ = decoded_[1 .. $];
-                return result;
-            }
-
-            assert(input_.length > 0 || rawUsed_ > 0);
-            updateBuffer();
-            return getDChar();
-        }
 
         /// Get as many characters as possible, but at most maxChars. Slice returned will be invalidated in further calls.
         const(dchar[]) getDChars(size_t maxChars = size_t.max)
@@ -476,12 +465,15 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
                 const slice = min(decoded_.length, maxChars);
                 const result = decoded_[0 .. slice];
                 decoded_ = decoded_[slice .. $];
+                assert(result !is null,
+                       "NULL error on a getDChars call without an error");
                 return result;
             }
 
             assert(input_.length > 0 || rawUsed_ > 0);
             updateBuffer();
-            return getDChars(maxChars);
+            // updateBuffer may fail
+            return errorMessage_ is null ? getDChars(maxChars) : null;
         }
 
     private:
@@ -524,6 +516,8 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
         }
 
         // Decode contents of a UTF-8 or UTF-16 raw buffer.
+        //
+        // On error, errorMessage_ will be set.
         void decodeRawBuffer(C)(C[] buffer, const size_t length)
             @safe pure
         {
@@ -532,10 +526,14 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
             const end = endOfLastUTFSequence(buffer, length);
             // If end is 0, there are no full UTF-8 chars.
             // This can happen at the end of file if there is an incomplete UTF-8 sequence.
-            enforce(end > 0,
-                    new ReaderException("Invalid UTF-8 character at the end of buffer"));
+            if(end <= 0)
+            {
+                errorMessage_ = "Invalid UTF-8 character at the end of buffer";
+                return;
+            }
 
             decodeUTF(buffer[0 .. end]);
+            if(errorMessage_ !is null) { return; }
 
             // After decoding, any code points not decoded go to the start of raw buffer.
             rawUsed_ = length - end;
@@ -588,10 +586,19 @@ struct UTFBlockDecoder(size_t bufferSize_) if (bufferSize_ % 2 == 0)
                     decodedSpace_[bufpos++] = c;
                     ++srcpos;
                 }
-                else
+                else try
                 {
                     decodedSpace_[bufpos++] = decode(source, srcpos);
                 }
+                catch(UTFException e)
+                {
+                    errorMessage_ = e.msg;
+                    return;
+                }
+                catch(Exception e)
+                {
+                    assert(false, "Unexpected exception in Reader.decodeUTF " ~ e.msg);
+                }
             }
             decoded_ = decodedSpace_[0 .. bufpos];
         }