Initial commit.
This commit is contained in:
commit
93c5c3834c
23
LICENSE_1_0.txt
Normal file
23
LICENSE_1_0.txt
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
Boost Software License - Version 1.0 - August 17th, 2003
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person or organization
|
||||||
|
obtaining a copy of the software and accompanying documentation covered by
|
||||||
|
this license (the "Software") to use, reproduce, display, distribute,
|
||||||
|
execute, and transmit the Software, and to prepare derivative works of the
|
||||||
|
Software, and to permit third-parties to whom the Software is furnished to
|
||||||
|
do so, all subject to the following:
|
||||||
|
|
||||||
|
The copyright notices in the Software and this entire statement, including
|
||||||
|
the above license grant, this restriction and the following disclaimer,
|
||||||
|
must be included in all copies of the Software, in whole or in part, and
|
||||||
|
all derivative works of the Software, unless such copies or derivative
|
||||||
|
works are solely in the form of machine-executable object code generated by
|
||||||
|
a source language processor.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||||
|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||||
|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||||
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
DEALINGS IN THE SOFTWARE.
|
104
README.rst
Normal file
104
README.rst
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
==========
|
||||||
|
TinyEndian
|
||||||
|
==========
|
||||||
|
|
||||||
|
------------
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
TinyEndian is a minimal endianness library for the D programming language. It has no
|
||||||
|
external dependencies, it only needs a D compiler and Phobos (standard library).
|
||||||
|
TinyEndian doesn't allocate memory and is fully **@nogc** to allow use in
|
||||||
|
high-performance code.
|
||||||
|
|
||||||
|
The API is not stable and may change in the future.
|
||||||
|
|
||||||
|
--------
|
||||||
|
Features
|
||||||
|
--------
|
||||||
|
|
||||||
|
* Swap byte order of 2- or 4-byte elements in an array in place.
|
||||||
|
* Read a UTF-8, UTF-16 or UTF-32 buffer, determine its endianness using a UTF
|
||||||
|
byte-order-mark and convert it to system endianness in place.
|
||||||
|
* No external dependencies.
|
||||||
|
* pure, nothrow and @nogc.
|
||||||
|
|
||||||
|
-------------------
|
||||||
|
Directory structure
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
=============== =======================================================================
|
||||||
|
Directory Contents
|
||||||
|
=============== =======================================================================
|
||||||
|
``./`` This README file, utility scripts, D:YAML sources outside any packages.
|
||||||
|
``./source`` Source code.
|
||||||
|
=============== =======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
-----
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
Assuming you use `dub <http://code.dlang.org/about>`_, add this line::
|
||||||
|
|
||||||
|
"tinyendian": { "version" : "~>0.1.0" }
|
||||||
|
|
||||||
|
to the ``"dependencies"`` in your project's ``dub.json``.
|
||||||
|
|
||||||
|
If you don't use dub, you can directly copy the ``source/tinyendian.d`` file into your
|
||||||
|
project.
|
||||||
|
|
||||||
|
TinyEndian requires DMD 2.066 or equivalent GDC/LDC.
|
||||||
|
|
||||||
|
Open ``source/tinyendian.d`` to read the API documentation.
|
||||||
|
|
||||||
|
|
||||||
|
-------
|
||||||
|
License
|
||||||
|
-------
|
||||||
|
|
||||||
|
TinyEndian is released under the terms of the
|
||||||
|
`Boost Software License 1.0 <http://www.boost.org/LICENSE_1_0.txt>`_.
|
||||||
|
This license allows you to use the source code in your own projects, open source
|
||||||
|
or proprietary, and to modify it to suit your needs. However, in source
|
||||||
|
distributions, you have to preserve the license headers in the source code and
|
||||||
|
the accompanying license file.
|
||||||
|
|
||||||
|
Full text of the license can be found in file ``LICENSE_1_0.txt`` and is also
|
||||||
|
displayed here::
|
||||||
|
|
||||||
|
Boost Software License - Version 1.0 - August 17th, 2003
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person or organization
|
||||||
|
obtaining a copy of the software and accompanying documentation covered by
|
||||||
|
this license (the "Software") to use, reproduce, display, distribute,
|
||||||
|
execute, and transmit the Software, and to prepare derivative works of the
|
||||||
|
Software, and to permit third-parties to whom the Software is furnished to
|
||||||
|
do so, all subject to the following:
|
||||||
|
|
||||||
|
The copyright notices in the Software and this entire statement, including
|
||||||
|
the above license grant, this restriction and the following disclaimer,
|
||||||
|
must be included in all copies of the Software, in whole or in part, and
|
||||||
|
all derivative works of the Software, unless such copies or derivative
|
||||||
|
works are solely in the form of machine-executable object code generated by
|
||||||
|
a source language processor.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||||
|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||||
|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||||
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
-------
|
||||||
|
Credits
|
||||||
|
-------
|
||||||
|
|
||||||
|
TinyEndian was created by Ferdinand Majerech aka Kiith-Sa kiithsacmp[AT]gmail.com .
|
||||||
|
|
||||||
|
Parts of code based on the ``std.stream`` Phobos module.
|
||||||
|
|
||||||
|
D:YAML was created using Vim and DMD Linux Mint as a YAML parsing library for the `D
|
||||||
|
programming language <http://www.dlang.org>`_.
|
16
package.json
Normal file
16
package.json
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
"name": "tinyendian",
|
||||||
|
"description": "Lightweight endianness handling library",
|
||||||
|
"authors": [ "Ferdinand Majerech" ],
|
||||||
|
"importPaths": ["source"],
|
||||||
|
"license": "Boost 1.0",
|
||||||
|
"homepage": "https://github.com/kiith-sa/tinyendian",
|
||||||
|
"copyright": "Copyright © 2014, Ferdinand Majerech",
|
||||||
|
|
||||||
|
"buildTypes":
|
||||||
|
{
|
||||||
|
"debug": { "buildOptions": ["debugMode", "debugInfoC"] },
|
||||||
|
"release": { "buildOptions": ["releaseMode", "optimize", "inline", "noBoundsCheck"] },
|
||||||
|
"profile": { "buildOptions": ["releaseMode", "optimize", "noBoundsCheck", "debugInfoC"] },
|
||||||
|
},
|
||||||
|
}
|
170
source/tinyendian.d
Normal file
170
source/tinyendian.d
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
// Copyright Ferdinand Majerech 2014.
|
||||||
|
// Distributed under the Boost Software License, Version 1.0.
|
||||||
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||||
|
// http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
/// A minimal library providing functionality for changing the endianness of data.
|
||||||
|
module tinyendian;
|
||||||
|
|
||||||
|
|
||||||
|
import core.stdc.string;
|
||||||
|
|
||||||
|
import std.algorithm;
|
||||||
|
import std.system;
|
||||||
|
import std.utf;
|
||||||
|
|
||||||
|
|
||||||
|
/// Unicode UTF encodings.
|
||||||
|
enum UTFEncoding : ubyte
|
||||||
|
{
|
||||||
|
UTF_8,
|
||||||
|
UTF_16,
|
||||||
|
UTF_32
|
||||||
|
}
|
||||||
|
|
||||||
|
@system pure nothrow @nogc:
|
||||||
|
|
||||||
|
/// Swap byte order of items in an array in place.
|
||||||
|
///
|
||||||
|
/// Params:
|
||||||
|
///
|
||||||
|
/// T = Item type. Must be either 2 or 4 bytes long.
|
||||||
|
/// array = Buffer with values to fix byte order of.
|
||||||
|
void swapByteOrder(T)(T[] array)
|
||||||
|
if([2, 4].canFind(T.sizeof))
|
||||||
|
{
|
||||||
|
import core.bitop;
|
||||||
|
// Swap the byte order of all read characters.
|
||||||
|
foreach(ref item; array)
|
||||||
|
{
|
||||||
|
static if(T.sizeof == 2)
|
||||||
|
{
|
||||||
|
swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1));
|
||||||
|
}
|
||||||
|
else static if(T.sizeof == 4)
|
||||||
|
{
|
||||||
|
item = bswap(cast(uint)item);
|
||||||
|
}
|
||||||
|
else static assert(false, "Unsupported T: " ~ T.stringof);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert byte order of an array encoded in UTF(8/16/32) to system endianness in
|
||||||
|
/// place.
|
||||||
|
///
|
||||||
|
/// Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM
|
||||||
|
/// at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The
|
||||||
|
/// BOM, if any, will be removed from the buffer.
|
||||||
|
///
|
||||||
|
/// If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes
|
||||||
|
/// for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by
|
||||||
|
/// 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped.
|
||||||
|
///
|
||||||
|
/// Note that this function does $(B not) check if the array is a valid UTF string. It
|
||||||
|
/// only works with the BOM and 1,2 or 4-byte items.
|
||||||
|
///
|
||||||
|
/// Params:
|
||||||
|
///
|
||||||
|
/// array = The array with UTF-data.
|
||||||
|
///
|
||||||
|
/// Returns:
|
||||||
|
///
|
||||||
|
/// A struct with the following members:
|
||||||
|
///
|
||||||
|
/// $(D ubyte[] array) A slice of the input array containing data in correct
|
||||||
|
/// byte order, without BOM and in case of UTF-16/UTF-32,
|
||||||
|
/// without stripped bytes, if any.
|
||||||
|
/// $(D UTFEncoding encoding) Encoding of the result (UTF-8, UTF-16 or UTF-32)
|
||||||
|
/// $(D std.system.Endian endian) Endianness of the original array.
|
||||||
|
/// $(D uint bytesStripped) Number of bytes stripped from a UTF-16/UTF-32 array,
|
||||||
|
/// if any. This is non-zero only if array.length was not
|
||||||
|
/// divisible by 2 or 4 for UTF-16 and UTF-32,
|
||||||
|
/// respectively.
|
||||||
|
///
|
||||||
|
/// Complexity: (BIGOH array.length)
|
||||||
|
auto fixUTFByteOrder(ubyte[] array)
|
||||||
|
{
|
||||||
|
// Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian.
|
||||||
|
enum BOM: ubyte
|
||||||
|
{
|
||||||
|
UTF_8 = 0,
|
||||||
|
UTF_16_LE = 1,
|
||||||
|
UTF_16_BE = 2,
|
||||||
|
UTF_32_LE = 3,
|
||||||
|
UTF_32_BE = 4,
|
||||||
|
None = ubyte.max
|
||||||
|
}
|
||||||
|
|
||||||
|
// These 2 are from std.stream
|
||||||
|
static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF],
|
||||||
|
[0xFF, 0xFE],
|
||||||
|
[0xFE, 0xFF],
|
||||||
|
[0xFF, 0xFE, 0x00, 0x00],
|
||||||
|
[0x00, 0x00, 0xFE, 0xFF] ];
|
||||||
|
static immutable Endian[5] bomEndian = [ std.system.endian,
|
||||||
|
Endian.littleEndian,
|
||||||
|
Endian.bigEndian,
|
||||||
|
Endian.littleEndian,
|
||||||
|
Endian.bigEndian ];
|
||||||
|
|
||||||
|
// Documented in function ddoc.
|
||||||
|
struct Result
|
||||||
|
{
|
||||||
|
ubyte[] array;
|
||||||
|
UTFEncoding encoding;
|
||||||
|
Endian endian;
|
||||||
|
uint bytesStripped = 0;
|
||||||
|
}
|
||||||
|
Result result;
|
||||||
|
|
||||||
|
// Detect BOM, if any, in the bytes we've read. -1 means no BOM.
|
||||||
|
// Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we
|
||||||
|
// used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM.
|
||||||
|
BOM bomId = BOM.None;
|
||||||
|
foreach(i, bom; byteOrderMarks) if(array.startsWith(bom))
|
||||||
|
{
|
||||||
|
bomId = cast(BOM)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init;
|
||||||
|
|
||||||
|
// Start of UTF data (after BOM, if any)
|
||||||
|
size_t start = 0;
|
||||||
|
// If we've read more than just the BOM, put the rest into the array.
|
||||||
|
with(BOM) final switch(bomId)
|
||||||
|
{
|
||||||
|
case None: result.encoding = UTFEncoding.UTF_8; break;
|
||||||
|
case UTF_8:
|
||||||
|
start = 3;
|
||||||
|
result.encoding = UTFEncoding.UTF_8;
|
||||||
|
break;
|
||||||
|
case UTF_16_LE, UTF_16_BE:
|
||||||
|
result.bytesStripped = array.length % 2;
|
||||||
|
start = 2;
|
||||||
|
result.encoding = UTFEncoding.UTF_16;
|
||||||
|
break;
|
||||||
|
case UTF_32_LE, UTF_32_BE:
|
||||||
|
result.bytesStripped = array.length % 4;
|
||||||
|
start = 4;
|
||||||
|
result.encoding = UTFEncoding.UTF_32;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
array = array[0 .. $ - result.bytesStripped];
|
||||||
|
// If there's a BOM, we need to move data back to ensure it starts at array[0]
|
||||||
|
if(start != 0)
|
||||||
|
{
|
||||||
|
core.stdc.string.memmove(array.ptr, array.ptr + start, array.length - start);
|
||||||
|
array = array[0 .. $ - start];
|
||||||
|
}
|
||||||
|
|
||||||
|
// We enforce above that array.length is divisible by 2/4 for UTF-16/32
|
||||||
|
if(std.system.endian != result.endian)
|
||||||
|
{
|
||||||
|
if(result.encoding == UTFEncoding.UTF_16) { swapByteOrder(cast(wchar[])array); }
|
||||||
|
else if(result.encoding == UTFEncoding.UTF_32) { swapByteOrder(cast(dchar[])array); }
|
||||||
|
}
|
||||||
|
|
||||||
|
result.array = array;
|
||||||
|
return result;
|
||||||
|
}
|
Loading…
Reference in a new issue