From 1f2243190fcd7d9349d5024837d94686c1d8362c Mon Sep 17 00:00:00 2001 From: Ferdinand Majerech Date: Sat, 22 Oct 2011 00:24:29 +0200 Subject: [PATCH] Added a random YAML generator to serve as an example and for benchmarking. --- examples/yaml_gen/Makefile | 5 + examples/yaml_gen/config.yaml | 41 +++++ examples/yaml_gen/yaml_gen.d | 299 ++++++++++++++++++++++++++++++++++ 3 files changed, 345 insertions(+) create mode 100644 examples/yaml_gen/Makefile create mode 100644 examples/yaml_gen/config.yaml create mode 100644 examples/yaml_gen/yaml_gen.d diff --git a/examples/yaml_gen/Makefile b/examples/yaml_gen/Makefile new file mode 100644 index 0000000..0f19a8d --- /dev/null +++ b/examples/yaml_gen/Makefile @@ -0,0 +1,5 @@ +main: + dmd -w -I../../ -L-L../../ -L-ldyaml yaml_gen.d + +clean: + rm yaml_gen yaml_gen.o diff --git a/examples/yaml_gen/config.yaml b/examples/yaml_gen/config.yaml new file mode 100644 index 0000000..5f81723 --- /dev/null +++ b/examples/yaml_gen/config.yaml @@ -0,0 +1,41 @@ +root-type: seq +documents: 2 +complex-keys: false +min-nodes-per-document: 512 +encoding: utf-32 +indent: 4 +text-width: 40 + +string: + probability: 10 + range: {min: 1, max: 40, dist: cubic} +int: + probability: 10 + range: {min: -10000000, max: 10000000, dist: linear} +float: + probability: 10 + range: {min: -10000000.0, max: 10000000.0, dist: linear} +bool: + probability: 10 +timestamp: + probability: 10 + round-chance: 0.9 + range: {min: 0, max: 1231200000000000000, dist: linear} +binary: + probability: 4 + range: {min: 1, max: 400, dist: quadratic} +map: + probability: 2 + range: {min: 1, max: 20, dist: cubic} +omap: + probability: 1 + range: {min: 1, max: 20, dist: cubic} +pairs: + probability: 1 + range: {min: 1, max: 20, dist: cubic} +seq: + probability: 2 + range: {min: 1, max: 20, dist: cubic} +set: + probability: 1 + range: {min: 1, max: 20, dist: cubic} diff --git a/examples/yaml_gen/yaml_gen.d b/examples/yaml_gen/yaml_gen.d new file mode 100644 index 0000000..134fd27 --- /dev/null +++ b/examples/yaml_gen/yaml_gen.d @@ -0,0 +1,299 @@ + +import std.conv; +import std.datetime; +import std.math; +import std.random; +import std.stdio; +import std.string; +import yaml; + + +immutable alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"; +immutable digits = "0123456789"; + +Node config; +Node function(bool)[string] generators; +auto typesScalar = ["string", "int", "float", "bool", "timestamp", "binary"]; +auto typesScalarKey = ["string", "int", "float", "timestamp"]; +auto typesCollection = ["map","omap", "pairs", "seq", "set"]; +ulong minNodesDocument; +ulong totalNodes; + +static this() +{ + generators["string"] = &genString; + generators["int"] = &genInt; + generators["float"] = &genFloat; + generators["bool"] = &genBool; + generators["timestamp"] = &genTimestamp; + generators["binary"] = &genBinary; + generators["map"] = &genMap; + generators["omap"] = &genOmap; + generators["pairs"] = &genPairs; + generators["seq"] = &genSeq; + generators["set"] = &genSet; +} + +real randomNormalized(in string distribution = "linear") +{ + auto generator = Random(unpredictableSeed()); + const r = uniform!"[]"(0.0L, 1.0L, generator); + switch(distribution) + { + case "linear": + return r; + case "quadratic": + return r * r; + case "cubic": + return r * r * r; + default: + writeln("Unknown random distribution: ", distribution, + ", falling back to linear"); + return randomNormalized("linear"); + } +} + +long randomLong(in long min, in long max, in string distribution = "linear") +{ + return min + cast(long)round((max - min) * randomNormalized(distribution)); +} + +real randomReal(in real min, in real max, in string distribution = "linear") +{ + return min + (max - min) * randomNormalized(distribution); +} + +char randomChar(in string chars) +{ + return chars[randomLong(0, chars.length - 1)]; +} + +string randomType(string[] types) +{ + auto probabilities = new uint[types.length]; + foreach(index, type; types) + { + probabilities[index] = config[type]["probability"].get!uint; + } + return types[dice(probabilities)]; +} + +Node genString(bool root = false) +{ + auto range = config["string"]["range"]; + + const chars = randomLong(range["min"].get!uint, range["max"].get!uint, + range["dist"].get!string); + + char[] result = new char[chars]; + result[0] = randomChar(alphabet); + foreach(i; 1 .. chars) + { + result[i] = randomChar(alphabet ~ digits); + } + + return Node(cast(string)result); +} + +Node genInt(bool root = false) +{ + auto range = config["int"]["range"]; + + const result = randomLong(range["min"].get!int, range["max"].get!int, + range["dist"].get!string); + + return Node(result); +} + +Node genFloat(bool root = false) +{ + auto range = config["float"]["range"]; + + const result = randomReal(range["min"].get!real, range["max"].get!real, + range["dist"].get!string); + + return Node(result); +} + +Node genBool(bool root = false) +{ + return Node([true, false][randomLong(0, 1)]); +} + +Node genTimestamp(bool root = false) +{ + auto range = config["timestamp"]["range"]; + + auto hnsecs = randomLong(range["min"].get!ulong, range["max"].get!ulong, + range["dist"].get!string); + + if(randomNormalized() <= config["timestamp"]["round-chance"].get!real) + { + hnsecs -= hnsecs % 10000000; + } + + return Node(SysTime(hnsecs)); +} + +Node genBinary(bool root = false) +{ + auto range = config["binary"]["range"]; + + const bytes = randomLong(range["min"].get!uint, range["max"].get!uint, + range["dist"].get!string); + + ubyte[] result = new ubyte[bytes]; + foreach(i; 0 .. bytes) + { + result[i] = cast(ubyte)randomLong(0, 255); + } + + return Node(result); +} + +Node nodes(in bool root, Node range, in string tag, in bool set = false) +{ + auto types = typesCollection ~ (set ? typesScalarKey : typesScalar); + + Node[] nodes; + if(root) + { + while(!(totalNodes >= minNodesDocument)) + { + nodes ~= generateNode(randomType(types)); + } + } + else + { + const elems = randomLong(range["min"].get!uint, range["max"].get!uint, + range["dist"].get!string); + + nodes = new Node[elems]; + foreach(i; 0 .. elems) + { + nodes[i] = generateNode(randomType(types)); + } + } + + return Node(nodes, tag); +} + +Node genSeq(bool root = false) +{ + return nodes(root, config["seq"]["range"], "tag:yaml.org,2002:seq"); +} + +Node genSet(bool root = false) +{ + return nodes(root, config["seq"]["range"], "tag:yaml.org,2002:set", true); +} + +Node pairs(bool root, bool complex, Node range, string tag) +{ + Node[] keys, values; + + if(root) + { + while(!(totalNodes >= minNodesDocument)) + { + keys ~= generateNode(randomType(typesScalarKey ~ (complex ? typesCollection : []))); + values ~= generateNode(randomType(typesScalar ~ typesCollection)); + } + } + else + { + const pairs = randomLong(range["min"].get!uint, range["max"].get!uint, + range["dist"].get!string); + + keys = new Node[pairs]; + values = new Node[pairs]; + foreach(i; 0 .. pairs) + { + keys[i] = generateNode(randomType(typesScalarKey ~ (complex ? typesCollection : []))); + values[i] = generateNode(randomType(typesScalar ~ typesCollection)); + } + } + + return Node(keys, values, tag); +} + +Node genMap(bool root = false) +{ + Node range = config["map"]["range"]; + const complex = config["complex-keys"].get!bool; + + return pairs(root, complex, range, "tag:yaml.org,2002:map"); +} + +Node genOmap(bool root = false) +{ + Node range = config["omap"]["range"]; + const complex = config["complex-keys"].get!bool; + + return pairs(root, complex, range, "tag:yaml.org,2002:omap"); +} + +Node genPairs(bool root = false) +{ + Node range = config["pairs"]["range"]; + const complex = config["complex-keys"].get!bool; + + return pairs(root, complex, range, "tag:yaml.org,2002:pairs"); +} + +Node generateNode(in string type, bool root = false) +{ + ++totalNodes; + return generators[type](root); +} + +Node[] generate(in string configFileName) +{ + config = Loader(configFileName).load(); + + minNodesDocument = config["min-nodes-per-document"].get!long; + + Node[] result; + foreach(i; 0 .. config["documents"].get!uint) + { + result ~= generateNode(config["root-type"].get!string, true); + totalNodes = 0; + } + + return result; +} + + +void main(string[] args) +{ + //Help message. + if(args.length == 1) + { + writeln("Usage: yaml_gen FILE [CONFIG_FILE]\n"); + writeln("Generates a random YAML file and writes it to FILE."); + writeln("If provided, CONFIG_FILE overrides the default config file."); + return; + } + + string configFile = args.length >= 3 ? args[2] : "config.yaml"; + + try + { + //Generate and dump the nodes. + Node[] generated = generate(configFile); + + auto dumper = Dumper(args[1]); + auto encoding = config["encoding"]; + dumper.encoding = encoding == "utf-16" ? Encoding.UTF_16: + encoding == "utf-32" ? Encoding.UTF_32: + Encoding.UTF_8; + + dumper.indent = config["indent"].get!uint; + dumper.textWidth = config["text-width"].get!uint; + dumper.dump(generated); + } + catch(YAMLException e) + { + writeln("ERROR: ", e.msg); + } +}