Fixed a bug in FastCharSearch.

Reader buffer is now manually allocated, decreasing GC workload
even further.
This commit is contained in:
Ferdinand Majerech 2011-10-25 00:16:43 +02:00
parent 97bdf819fa
commit 8b995e5061
3 changed files with 38 additions and 22 deletions

View file

@ -81,22 +81,16 @@ string searchCode(dstring chars, uint tableSize)()
if(tableSize) if(tableSize)
{ {
code ~= specialChars.length code ~= " if(c < " ~ tableSizeStr ~ ")\n"
?
" if(c < " ~ tableSizeStr ~ ")\n"
" {\n" " {\n"
" return cast(bool)table_[c];\n" " return cast(bool)table_[c];\n"
" }\n" " }\n";
:
" return cast(bool)table_[c];\n";
}
if(specialChars.length)
{
code ~= " return " ~ specialCharsCode() ~ ";\n";
} }
code ~= " assert(false);\n" code ~= specialChars.length
"}\n"; ? " return " ~ specialCharsCode() ~ ";\n"
: " return false";
code ~= "}\n";
return code; return code;
} }

View file

@ -40,6 +40,8 @@ final class Reader
private: private:
///Input stream. ///Input stream.
EndianStream stream_; EndianStream stream_;
///Allocated space for buffer_.
dchar[] bufferAllocated_;
///Buffer of currently loaded characters. ///Buffer of currently loaded characters.
dchar[] buffer_; dchar[] buffer_;
///Current position within buffer. Only data after this position can be read. ///Current position within buffer. Only data after this position can be read.
@ -123,13 +125,15 @@ final class Reader
default: assert(false, "Unknown UTF BOM"); default: assert(false, "Unknown UTF BOM");
} }
available_ = stream_.available; available_ = stream_.available;
bufferReserve(256);
} }
///Destroy the Reader. ///Destroy the Reader.
~this() ~this()
{ {
clear(buffer_); core.stdc.stdlib.free(bufferAllocated_.ptr);
buffer_ = null; buffer_ = bufferAllocated_ = null;
} }
/** /**
@ -158,6 +162,9 @@ final class Reader
/** /**
* Get specified number of characters starting at current position. * Get specified number of characters starting at current position.
* *
* Note: This gets only a "view" into the internal buffer,
* which WILL get invalidated after other Reader calls.
*
* Params: length = Number of characters to get. * Params: length = Number of characters to get.
* *
* Returns: Characters starting at current position. * Returns: Characters starting at current position.
@ -172,7 +179,7 @@ final class Reader
const end = min(buffer_.length, bufferOffset_ + length); const end = min(buffer_.length, bufferOffset_ + length);
//need to duplicate as we change buffer content with C functions //need to duplicate as we change buffer content with C functions
//and could end up with returned string referencing changed data //and could end up with returned string referencing changed data
return cast(dstring)buffer_[bufferOffset_ .. end].dup; return cast(dstring)buffer_[bufferOffset_ .. end];
} }
/** /**
@ -202,9 +209,9 @@ final class Reader
*/ */
dstring get(in size_t length) dstring get(in size_t length)
{ {
dstring result = prefix(length); auto result = prefix(length).dup;
forward(length); forward(length);
return result; return cast(dstring)result;
} }
/** /**
@ -273,7 +280,7 @@ final class Reader
size_t bufferLength = buffer_.length - bufferOffset_; size_t bufferLength = buffer_.length - bufferOffset_;
memmove(buffer_.ptr, buffer_.ptr + bufferOffset_, memmove(buffer_.ptr, buffer_.ptr + bufferOffset_,
bufferLength * dchar.sizeof); bufferLength * dchar.sizeof);
buffer_.length = bufferLength; buffer_ = buffer_[0 .. bufferLength];
bufferOffset_ = 0; bufferOffset_ = 0;
} }
@ -286,7 +293,9 @@ final class Reader
{ {
if(buffer_.length == 0 || buffer_[$ - 1] != '\0') if(buffer_.length == 0 || buffer_[$ - 1] != '\0')
{ {
buffer_ ~= '\0'; bufferReserve(buffer_.length + 1);
buffer_ = bufferAllocated_[0 .. buffer_.length + 1];
buffer_[$ - 1] = '\0';
} }
break; break;
} }
@ -380,10 +389,12 @@ final class Reader
const oldPosition = stream_.position; const oldPosition = stream_.position;
//Preallocating memory to limit GC reallocations. //Preallocating memory to limit GC reallocations.
buffer_.length = buffer_.length + chars;
bufferReserve(buffer_.length + chars);
buffer_ = bufferAllocated_[0 .. buffer_.length + chars];
scope(exit) scope(exit)
{ {
buffer_.length = buffer_.length - chars; buffer_ = buffer_[0 .. $ - chars];
enforce(printable(buffer_[oldLength .. $]), enforce(printable(buffer_[oldLength .. $]),
new ReaderException("Special unicode characters are not allowed")); new ReaderException("Special unicode characters are not allowed"));
} }
@ -436,6 +447,17 @@ final class Reader
encoding_ == Encoding.UTF_32)); encoding_ == Encoding.UTF_32));
} }
///Ensure there is space for at least capacity characters in bufferAllocated_.
void bufferReserve(in size_t capacity)
{
if(bufferAllocated_.length >= capacity){return;}
auto newPtr = core.stdc.stdlib.realloc(bufferAllocated_.ptr,
capacity * dchar.sizeof);
bufferAllocated_ = (cast(dchar*)newPtr)[0 .. capacity];
buffer_ = bufferAllocated_[0 .. buffer_.length];
}
unittest unittest
{ {
writeln("D:YAML reader endian unittest"); writeln("D:YAML reader endian unittest");

View file

@ -1363,7 +1363,7 @@ final class Scanner
{ {
//Instead of checking indentation, we check for document separators. //Instead of checking indentation, we check for document separators.
const prefix = reader_.prefix(3); const prefix = reader_.prefix(3);
if((prefix == "---" || prefix == "...") && if((prefix == "---"d || prefix == "..."d) &&
" \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3))) " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3)))
{ {
throw new Error("While scanning a quoted scalar", startMark, throw new Error("While scanning a quoted scalar", startMark,