/** * Parser for microformats. * Standards: http://microformats.org/wiki/microformats2-parsing */ module nl.netsoj.chris.blog.microformats.parser; import std.exception; import std.traits; import vibe.inet.url; import html; import nl.netsoj.chris.blog.url; class MicroFormatParseException : Exception { mixin basicExceptionCtors; } struct MicroFormatProperty { enum Type { RootClass, PlainText, Url, DateTime, EmbeddedMarkup }; Type type = Type.PlainText; string getClassPrefix() pure { final switch(type) { case Type.RootClass: return "h-"; case Type.PlainText: return "p-"; case Type.Url: return "u-"; case Type.DateTime: return "dt-"; case Type.EmbeddedMarkup: return "e-"; } } }; /** * Parses a web page to extract to the given microformat model out of it */ T[] parsePage(T)(string source, URL url) { return parsePage!T(createDocument(source), url); } T[] parsePage(T)(Document page, URL url) if (isAggregateType!T) { import std.algorithm; import std.array; import std.conv; import std.range; import std.string; string rootClass = "h-" ~ T.stringof.toLower; return page.querySelectorAll(".%s".format(rootClass)).map!((node){ alias PropType = MicroFormatProperty.Type; T instance = T(); MicroFormatProperty uda; string propertyClass; Node propNode; static foreach(sym; getSymbolsByUDA!(T, MicroFormatProperty)) { uda = getUDAs!(sym, MicroFormatProperty)[0]; propertyClass = uda.getClassPrefix() ~ sym.stringof.toLower; propNode = page.querySelector(".%s".format(propertyClass), node); switch (uda.type) { case PropType.PlainText: if (propNode.firstChild && propNode.firstChild.isTextNode()) { __traits(getMember, instance, sym.stringof) = to!string(propNode.text); } break; case PropType.Url: if (propNode.tag == "a" && propNode.hasAttr("href")) { __traits(getMember, instance, sym.stringof) = resolveURL(url, to!string(propNode["href"])).toString; } else if (propNode.tag == "img" && propNode.hasAttr("src")) { __traits(getMember, instance, sym.stringof) = resolveURL(url, to!string(propNode["src"])).toString; } break; default: break; } } return instance; }).array; } unittest { import std.stdio; string page = q"eos
Example App
" eos"; struct App { @MicroFormatProperty(MicroFormatProperty.Type.Url) string logo; @MicroFormatProperty(MicroFormatProperty.Type.PlainText) string name; } auto ts = parsePage!App(page, URL("https://example.com/")); assert(ts[0] == App ("https://example.com:443/logo.png", "Example App")); }