Merge pull request #9 from BBasile/safe

make `@safe`, improve coverage and refresh the style
This commit is contained in:
Cameron Ross 2018-06-10 05:53:42 -03:00 committed by GitHub
commit 4d862448a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -6,15 +6,7 @@
/// A minimal library providing functionality for changing the endianness of data. /// A minimal library providing functionality for changing the endianness of data.
module tinyendian; module tinyendian;
import std.system : Endian, endian;
import core.stdc.string;
import std.algorithm;
import std.system;
import std.utf;
static if(__VERSION__ < 2066)
private enum nogc;
/// Unicode UTF encodings. /// Unicode UTF encodings.
enum UTFEncoding : ubyte enum UTFEncoding : ubyte
@ -23,8 +15,8 @@ enum UTFEncoding : ubyte
UTF_16, UTF_16,
UTF_32 UTF_32
} }
///
unittest @safe unittest
{ {
const ints = [314, -101]; const ints = [314, -101];
int[2] intsSwapBuffer = ints; int[2] intsSwapBuffer = ints;
@ -39,8 +31,6 @@ unittest
assert(floats == floatsSwapBuffer, "Lost information when swapping byte order"); assert(floats == floatsSwapBuffer, "Lost information when swapping byte order");
} }
@nogc @system pure nothrow:
/** Swap byte order of items in an array in place. /** Swap byte order of items in an array in place.
* *
* Params: * Params:
@ -48,19 +38,20 @@ unittest
* T = Item type. Must be either 2 or 4 bytes long. * T = Item type. Must be either 2 or 4 bytes long.
* array = Buffer with values to fix byte order of. * array = Buffer with values to fix byte order of.
*/ */
void swapByteOrder(T)(T[] array) void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow
if([2, 4].canFind(T.sizeof)) if (T.sizeof == 2 || T.sizeof == 4)
{ {
import core.bitop;
// Swap the byte order of all read characters. // Swap the byte order of all read characters.
foreach (ref item; array) foreach (ref item; array)
{ {
static if (T.sizeof == 2) static if (T.sizeof == 2)
{ {
import std.algorithm.mutation : swap;
swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1)); swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1));
} }
else static if (T.sizeof == 4) else static if (T.sizeof == 4)
{ {
import core.bitop : bswap;
const swapped = bswap(*cast(uint*)&item); const swapped = bswap(*cast(uint*)&item);
item = *cast(const(T)*)&swapped; item = *cast(const(T)*)&swapped;
} }
@ -68,6 +59,15 @@ void swapByteOrder(T)(T[] array)
} }
} }
/// See fixUTFByteOrder.
struct FixUTFByteOrderResult
{
ubyte[] array;
UTFEncoding encoding;
Endian endian;
uint bytesStripped = 0;
}
/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place. /** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place.
* *
* Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM
@ -100,7 +100,7 @@ void swapByteOrder(T)(T[] array)
* *
* Complexity: (BIGOH array.length) * Complexity: (BIGOH array.length)
*/ */
auto fixUTFByteOrder(ubyte[] array) auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow
{ {
// Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian. // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian.
enum BOM: ubyte enum BOM: ubyte
@ -119,30 +119,24 @@ auto fixUTFByteOrder(ubyte[] array)
[0xFE, 0xFF], [0xFE, 0xFF],
[0xFF, 0xFE, 0x00, 0x00], [0xFF, 0xFE, 0x00, 0x00],
[0x00, 0x00, 0xFE, 0xFF] ]; [0x00, 0x00, 0xFE, 0xFF] ];
static immutable Endian[5] bomEndian = [ std.system.endian, static immutable Endian[5] bomEndian = [ endian,
Endian.littleEndian, Endian.littleEndian,
Endian.bigEndian, Endian.bigEndian,
Endian.littleEndian, Endian.littleEndian,
Endian.bigEndian ]; Endian.bigEndian ];
// Documented in function ddoc. // Documented in function ddoc.
struct Result
{ FixUTFByteOrderResult result;
ubyte[] array;
UTFEncoding encoding;
Endian endian;
uint bytesStripped = 0;
}
Result result;
// Detect BOM, if any, in the bytes we've read. -1 means no BOM. // Detect BOM, if any, in the bytes we've read. -1 means no BOM.
// Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we
// used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM. // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM.
import std.algorithm.searching : startsWith;
BOM bomId = BOM.None; BOM bomId = BOM.None;
foreach(i, bom; byteOrderMarks) if(array.startsWith(bom)) foreach (i, bom; byteOrderMarks)
{ if (array.startsWith(bom))
bomId = cast(BOM)i; bomId = cast(BOM)i;
}
result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init; result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init;
@ -168,21 +162,52 @@ auto fixUTFByteOrder(ubyte[] array)
break; break;
} }
array = array[0 .. $ - result.bytesStripped];
// If there's a BOM, we need to move data back to ensure it starts at array[0] // If there's a BOM, we need to move data back to ensure it starts at array[0]
if (start != 0) if (start != 0)
{ {
core.stdc.string.memmove(array.ptr, array.ptr + start, array.length - start); array = array[start .. $ - result.bytesStripped];
array = array[0 .. $ - start];
} }
// We enforce above that array.length is divisible by 2/4 for UTF-16/32 // We enforce above that array.length is divisible by 2/4 for UTF-16/32
if(std.system.endian != result.endian) if (endian != result.endian)
{ {
if(result.encoding == UTFEncoding.UTF_16) { swapByteOrder(cast(wchar[])array); } if (result.encoding == UTFEncoding.UTF_16)
else if(result.encoding == UTFEncoding.UTF_32) { swapByteOrder(cast(dchar[])array); } swapByteOrder(cast(wchar[])array);
else if (result.encoding == UTFEncoding.UTF_32)
swapByteOrder(cast(dchar[])array);
} }
result.array = array; result.array = array;
return result; return result;
} }
///
@safe unittest
{
{
ubyte[] s = [0xEF, 0xBB, 0xBF, 'a'];
FixUTFByteOrderResult r = fixUTFByteOrder(s);
assert(r.encoding == UTFEncoding.UTF_8);
assert(r.array.length == 1);
assert(r.array == ['a']);
assert(r.endian == Endian.littleEndian);
}
{
ubyte[] s = ['a'];
FixUTFByteOrderResult r = fixUTFByteOrder(s);
assert(r.encoding == UTFEncoding.UTF_8);
assert(r.array.length == 1);
assert(r.array == ['a']);
assert(r.endian == Endian.bigEndian);
}
{
// strip 'a' b/c not complete unit
ubyte[] s = [0xFE, 0xFF, 'a'];
FixUTFByteOrderResult r = fixUTFByteOrder(s);
assert(r.encoding == UTFEncoding.UTF_16);
assert(r.array.length == 0);
assert(r.endian == Endian.bigEndian);
}
}