tinyendian is now a DUB package.

2014-08-06 16:15:02 +02:00 · 2014-08-06 16:15:02 +02:00 · 494dcd30d9
commit 494dcd30d9
parent 510065b111
2 changed files with 1 additions and 171 deletions
--- a/package.json
+++ b/package.json
@ -5,7 +5,7 @@
    "libs": [],
    "importPaths": ["source"],
    "license": "Boost 1.0",
-    "dependencies": {},
+    "dependencies": { "tinyendian" : { "version" : "~>0.1.0" } },
    "homepage": "https://github.com/kiith-sa/D-YAML",
    "copyright": "Copyright © 2011-2014, Ferdinand Majerech",

--- a/source/tinyendian.d
+++ b/source/tinyendian.d
@ -1,170 +0,0 @@
-//          Copyright Ferdinand Majerech 2014.
-// Distributed under the Boost Software License, Version 1.0.
-//    (See accompanying file LICENSE_1_0.txt or copy at
-//          http://www.boost.org/LICENSE_1_0.txt)
-
-module tinyendian;
-
-
-import core.stdc.string;
-
-import std.algorithm;
-import std.system;
-import std.utf;
-
-// TODO tinyendian should be its own DUB package
-
-/// Unicode UTF encodings.
-enum UTFEncoding : ubyte
-{
-    UTF_8,
-    UTF_16,
-    UTF_32
-}
-
-@system pure nothrow @nogc:
-
-/// Swap byte order of items in an array in place.
-///
-/// Params:
-///
-/// T     = Item type. Must be either 2 or 4 bytes long.
-/// array = Buffer with values to fix byte order of.
-void swapByteOrder(T)(T[] array)
-    if([2, 4].canFind(T.sizeof))
-{
-    import core.bitop;
-    // Swap the byte order of all read characters.
-    foreach(ref item; array)
-    {
-        static if(T.sizeof == 2)
-        {
-            swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1));
-        }
-        else static if(T.sizeof == 4)
-        {
-            item = bswap(cast(uint)item);
-        }
-        else static assert(false, "Unsupported T: " ~ T.stringof);
-    }
-}
-
-/// Convert byte order of an array encoded in UTF(8/16/32) to system endianness in
-/// place.
-///
-/// Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM
-/// at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The
-/// BOM, if any, will be removed from the buffer.
-///
-/// If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes
-/// for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by
-/// 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped.
-///
-/// Note that this function does $(B not) check if the array is a valid UTF string. It
-/// only works with the BOM and 1,2 or 4-byte items.
-///
-/// Params:
-///
-/// array = The array with UTF-data.
-///
-/// Returns:
-///
-/// A struct with the following members:
-///
-/// $(D ubyte[] array)            A slice of the input array containing data in correct
-///                               byte order, without BOM and in case of UTF-16/UTF-32,
-///                               without stripped bytes, if any.
-/// $(D UTFEncoding encoding)     Encoding of the result (UTF-8, UTF-16 or UTF-32)
-/// $(D std.system.Endian endian) Endianness of the original array.
-/// $(D uint bytesStripped)       Number of bytes stripped from a UTF-16/UTF-32 array,
-///                               if any. This is non-zero only if array.length was not
-///                               divisible by 2 or 4 for UTF-16 and UTF-32,
-///                               respectively.
-///
-/// Complexity: (BIGOH array.length)
-auto fixUTFByteOrder(ubyte[] array)
-{
-    // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian.
-    enum BOM: ubyte
-    {
-        UTF_8     = 0,
-        UTF_16_LE = 1,
-        UTF_16_BE = 2,
-        UTF_32_LE = 3,
-        UTF_32_BE = 4,
-        None      = ubyte.max
-    }
-
-    // These 2 are from std.stream
-    static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF],
-                                                   [0xFF, 0xFE],
-                                                   [0xFE, 0xFF],
-                                                   [0xFF, 0xFE, 0x00, 0x00],
-                                                   [0x00, 0x00, 0xFE, 0xFF] ];
-    static immutable Endian[5] bomEndian = [ std.system.endian,
-                                             Endian.littleEndian,
-                                             Endian.bigEndian,
-                                             Endian.littleEndian, 
-                                             Endian.bigEndian ];
-
-    // Documented in function ddoc.
-    struct Result
-    {
-        ubyte[] array;
-        UTFEncoding encoding;
-        Endian endian;
-        uint bytesStripped = 0;
-    }
-    Result result;
-
-    // Detect BOM, if any, in the bytes we've read. -1 means no BOM.
-    // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we
-    // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM.
-    BOM bomId = BOM.None;
-    foreach(i, bom; byteOrderMarks) if(array.startsWith(bom))
-    {
-        bomId = cast(BOM)i;
-    }
-
-    result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init;
-
-    // Start of UTF data (after BOM, if any)
-    size_t start = 0;
-    // If we've read more than just the BOM, put the rest into the array.
-    with(BOM) final switch(bomId)
-    {
-        case None: result.encoding = UTFEncoding.UTF_8; break;
-        case UTF_8:
-            start = 3;
-            result.encoding = UTFEncoding.UTF_8;
-            break;
-        case UTF_16_LE, UTF_16_BE:
-            result.bytesStripped = array.length % 2;
-            start = 2;
-            result.encoding = UTFEncoding.UTF_16;
-            break;
-        case UTF_32_LE, UTF_32_BE:
-            result.bytesStripped = array.length % 4;
-            start = 4;
-            result.encoding = UTFEncoding.UTF_32;
-            break;
-    }
-
-    array = array[0 .. $ - result.bytesStripped];
-    // If there's a BOM, we need to move data back to ensure it starts at array[0]
-    if(start != 0)
-    {
-        core.stdc.string.memmove(array.ptr, array.ptr + start, array.length - start);
-        array = array[0 .. $ - start];
-    }
-
-    // We enforce above that array.length is divisible by 2/4 for UTF-16/32
-    if(std.system.endian != result.endian)
-    {
-        if(result.encoding == UTFEncoding.UTF_16)      { swapByteOrder(cast(wchar[])array); }
-        else if(result.encoding == UTFEncoding.UTF_32) { swapByteOrder(cast(dchar[])array); }
-    }
-
-    result.array = array;
-    return result;
-}