Initial commit.

2014-08-06 15:35:59 +02:00 · 2014-08-06 15:35:59 +02:00 · 93c5c3834c
commit 93c5c3834c
4 changed files with 313 additions and 0 deletions
--- a/LICENSE_1_0.txt
+++ b/LICENSE_1_0.txt
@ -0,0 +1,23 @@
 Boost Software License - Version 1.0 - August 17th, 2003
 Permission is hereby granted, free of charge, to any person or organization
 obtaining a copy of the software and accompanying documentation covered by
 this license (the "Software") to use, reproduce, display, distribute,
 execute, and transmit the Software, and to prepare derivative works of the
 Software, and to permit third-parties to whom the Software is furnished to
 do so, all subject to the following:
 The copyright notices in the Software and this entire statement, including
 the above license grant, this restriction and the following disclaimer,
 must be included in all copies of the Software, in whole or in part, and
 all derivative works of the Software, unless such copies or derivative
 works are solely in the form of machine-executable object code generated by
 a source language processor.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.
--- a/README.rst
+++ b/README.rst
@ -0,0 +1,104 @@
 ==========
 TinyEndian
 ==========
 ------------
 Introduction
 ------------
 TinyEndian is a minimal endianness library for the D programming language.  It has no
 external dependencies, it only needs a D compiler and Phobos (standard library).
 TinyEndian doesn't allocate memory and is fully **@nogc** to allow use in
 high-performance code.
 The API is not stable and may change in the future.
 --------
 Features
 --------
 * Swap byte order of 2- or 4-byte elements in an array in place.
 * Read a UTF-8, UTF-16 or UTF-32 buffer, determine its endianness using a UTF
  byte-order-mark and convert it to system endianness in place.
 * No external dependencies.
 * pure, nothrow and @nogc.
 -------------------
 Directory structure
 -------------------
 ===============  =======================================================================
 Directory        Contents
 ===============  =======================================================================
 ``./``           This README file, utility scripts, D:YAML sources outside any packages.
 ``./source``     Source code.
 ===============  =======================================================================
 -----
 Usage
 -----
 Assuming you use `dub <http://code.dlang.org/about>`_, add this line::
   "tinyendian": { "version" : "~>0.1.0" }
 to the ``"dependencies"`` in your project's ``dub.json``.
 If you don't use dub, you can directly copy the ``source/tinyendian.d`` file into your
 project.
 TinyEndian requires DMD 2.066 or equivalent GDC/LDC.
 Open ``source/tinyendian.d`` to read the API documentation.
 -------
 License
 -------
 TinyEndian is released under the terms of the
 `Boost Software License 1.0 <http://www.boost.org/LICENSE_1_0.txt>`_.
 This license allows you to use the source code in your own projects, open source
 or proprietary, and to modify it to suit your needs. However, in source
 distributions, you have to preserve the license headers in the source code and
 the accompanying license file.
 Full text of the license can be found in file ``LICENSE_1_0.txt`` and is also
 displayed here::
    Boost Software License - Version 1.0 - August 17th, 2003
    Permission is hereby granted, free of charge, to any person or organization
    obtaining a copy of the software and accompanying documentation covered by
    this license (the "Software") to use, reproduce, display, distribute,
    execute, and transmit the Software, and to prepare derivative works of the
    Software, and to permit third-parties to whom the Software is furnished to
    do so, all subject to the following:
    The copyright notices in the Software and this entire statement, including
    the above license grant, this restriction and the following disclaimer,
    must be included in all copies of the Software, in whole or in part, and
    all derivative works of the Software, unless such copies or derivative
    works are solely in the form of machine-executable object code generated by
    a source language processor.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
    SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
    FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    DEALINGS IN THE SOFTWARE.
 -------
 Credits
 -------
 TinyEndian was created by Ferdinand Majerech aka Kiith-Sa kiithsacmp[AT]gmail.com .
 Parts of code based on the ``std.stream`` Phobos module.
 D:YAML was created using Vim and DMD Linux Mint as a YAML parsing library for the `D
 programming language <http://www.dlang.org>`_.
--- a/package.json
+++ b/package.json
@ -0,0 +1,16 @@
 {
    "name": "tinyendian",
    "description": "Lightweight endianness handling library",
    "authors": [ "Ferdinand Majerech" ],
    "importPaths": ["source"],
    "license": "Boost 1.0",
    "homepage": "https://github.com/kiith-sa/tinyendian",
    "copyright": "Copyright © 2014, Ferdinand Majerech",
    "buildTypes":
    {
        "debug": { "buildOptions": ["debugMode", "debugInfoC"] },
        "release": { "buildOptions": ["releaseMode", "optimize", "inline", "noBoundsCheck"] },
        "profile": { "buildOptions": ["releaseMode", "optimize", "noBoundsCheck", "debugInfoC"] },
    },
 }
--- a/source/tinyendian.d
+++ b/source/tinyendian.d
@ -0,0 +1,170 @@
 //          Copyright Ferdinand Majerech 2014.
 // Distributed under the Boost Software License, Version 1.0.
 //    (See accompanying file LICENSE_1_0.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 /// A minimal library providing functionality for changing the endianness of data.
 module tinyendian;
 import core.stdc.string;
 import std.algorithm;
 import std.system;
 import std.utf;
 /// Unicode UTF encodings.
 enum UTFEncoding : ubyte
 {
    UTF_8,
    UTF_16,
    UTF_32
 }
@system pure nothrow @nogc:
 /// Swap byte order of items in an array in place.
 ///
 /// Params:
 ///
 /// T     = Item type. Must be either 2 or 4 bytes long.
 /// array = Buffer with values to fix byte order of.
 void swapByteOrder(T)(T[] array)
    if([2, 4].canFind(T.sizeof))
 {
    import core.bitop;
    // Swap the byte order of all read characters.
    foreach(ref item; array)
    {
        static if(T.sizeof == 2)
        {
            swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1));
        }
        else static if(T.sizeof == 4)
        {
            item = bswap(cast(uint)item);
        }
        else static assert(false, "Unsupported T: " ~ T.stringof);
    }
 }
 /// Convert byte order of an array encoded in UTF(8/16/32) to system endianness in
 /// place.
 ///
 /// Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM
 /// at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The
 /// BOM, if any, will be removed from the buffer.
 ///
 /// If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes
 /// for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by
 /// 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped.
 ///
 /// Note that this function does $(B not) check if the array is a valid UTF string. It
 /// only works with the BOM and 1,2 or 4-byte items.
 ///
 /// Params:
 ///
 /// array = The array with UTF-data.
 ///
 /// Returns:
 ///
 /// A struct with the following members:
 ///
 /// $(D ubyte[] array)            A slice of the input array containing data in correct
 ///                               byte order, without BOM and in case of UTF-16/UTF-32,
 ///                               without stripped bytes, if any.
 /// $(D UTFEncoding encoding)     Encoding of the result (UTF-8, UTF-16 or UTF-32)
 /// $(D std.system.Endian endian) Endianness of the original array.
 /// $(D uint bytesStripped)       Number of bytes stripped from a UTF-16/UTF-32 array,
 ///                               if any. This is non-zero only if array.length was not
 ///                               divisible by 2 or 4 for UTF-16 and UTF-32,
 ///                               respectively.
 ///
 /// Complexity: (BIGOH array.length)
 auto fixUTFByteOrder(ubyte[] array)
 {
    // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian.
    enum BOM: ubyte
    {
        UTF_8     = 0,
        UTF_16_LE = 1,
        UTF_16_BE = 2,
        UTF_32_LE = 3,
        UTF_32_BE = 4,
        None      = ubyte.max
    }
    // These 2 are from std.stream
    static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF],
                                                   [0xFF, 0xFE],
                                                   [0xFE, 0xFF],
                                                   [0xFF, 0xFE, 0x00, 0x00],
                                                   [0x00, 0x00, 0xFE, 0xFF] ];
    static immutable Endian[5] bomEndian = [ std.system.endian,
                                             Endian.littleEndian,
                                             Endian.bigEndian,
                                             Endian.littleEndian, 
                                             Endian.bigEndian ];
    // Documented in function ddoc.
    struct Result
    {
        ubyte[] array;
        UTFEncoding encoding;
        Endian endian;
        uint bytesStripped = 0;
    }
    Result result;
    // Detect BOM, if any, in the bytes we've read. -1 means no BOM.
    // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we
    // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM.
    BOM bomId = BOM.None;
    foreach(i, bom; byteOrderMarks) if(array.startsWith(bom))
    {
        bomId = cast(BOM)i;
    }
    result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init;
    // Start of UTF data (after BOM, if any)
    size_t start = 0;
    // If we've read more than just the BOM, put the rest into the array.
    with(BOM) final switch(bomId)
    {
        case None: result.encoding = UTFEncoding.UTF_8; break;
        case UTF_8:
            start = 3;
            result.encoding = UTFEncoding.UTF_8;
            break;
        case UTF_16_LE, UTF_16_BE:
            result.bytesStripped = array.length % 2;
            start = 2;
            result.encoding = UTFEncoding.UTF_16;
            break;
        case UTF_32_LE, UTF_32_BE:
            result.bytesStripped = array.length % 4;
            start = 4;
            result.encoding = UTFEncoding.UTF_32;
            break;
    }
    array = array[0 .. $ - result.bytesStripped];
    // If there's a BOM, we need to move data back to ensure it starts at array[0]
    if(start != 0)
    {
        core.stdc.string.memmove(array.ptr, array.ptr + start, array.length - start);
        array = array[0 .. $ - start];
    }
    // We enforce above that array.length is divisible by 2/4 for UTF-16/32
    if(std.system.endian != result.endian)
    {
        if(result.encoding == UTFEncoding.UTF_16)      { swapByteOrder(cast(wchar[])array); }
        else if(result.encoding == UTFEncoding.UTF_32) { swapByteOrder(cast(dchar[])array); }
    }
    result.array = array;
    return result;
 }