chris-website/source/nl/netsoj/chris/blog/microformats/parser.d
2021-11-18 19:28:24 +01:00

117 lines
2.7 KiB
D

/**
* Parser for microformats.
* Standards: http://microformats.org/wiki/microformats2-parsing
*/
module nl.netsoj.chris.blog.microformats.parser;
import std.exception;
import std.traits;
import vibe.inet.url;
import html;
import nl.netsoj.chris.blog.url;
class MicroFormatParseException : Exception {
mixin basicExceptionCtors;
}
struct MicroFormatProperty {
enum Type {
RootClass,
PlainText,
Url,
DateTime,
EmbeddedMarkup
};
Type type = Type.PlainText;
string getClassPrefix() pure {
final switch(type) {
case Type.RootClass:
return "h-";
case Type.PlainText:
return "p-";
case Type.Url:
return "u-";
case Type.DateTime:
return "dt-";
case Type.EmbeddedMarkup:
return "e-";
}
}
};
/**
* Parses a web page to extract to the given microformat model out of it
*/
T[] parsePage(T)(string source, URL url) {
return parsePage!T(createDocument(source), url);
}
T[] parsePage(T)(Document page, URL url)
if (isAggregateType!T) {
import std.algorithm;
import std.array;
import std.conv;
import std.range;
import std.string;
string rootClass = "h-" ~ T.stringof.toLower;
return page.querySelectorAll(".%s".format(rootClass)).map!((node){
alias PropType = MicroFormatProperty.Type;
T instance = T();
MicroFormatProperty uda;
string propertyClass;
Node propNode;
static foreach(sym; getSymbolsByUDA!(T, MicroFormatProperty)) {
uda = getUDAs!(sym, MicroFormatProperty)[0];
propertyClass = uda.getClassPrefix() ~ sym.stringof.toLower;
propNode = page.querySelector(".%s".format(propertyClass), node);
switch (uda.type) {
case PropType.PlainText:
if (propNode.firstChild && propNode.firstChild.isTextNode()) {
__traits(getMember, instance, sym.stringof) = to!string(propNode.text);
}
break;
case PropType.Url:
if (propNode.tag == "a" && propNode.hasAttr("href")) {
__traits(getMember, instance, sym.stringof) = resolveURL(url, to!string(propNode["href"])).toString;
} else if (propNode.tag == "img" && propNode.hasAttr("src")) {
__traits(getMember, instance, sym.stringof) = resolveURL(url, to!string(propNode["src"])).toString;
}
break;
default:
break;
}
}
return instance;
}).array;
}
unittest {
import std.stdio;
string page = q"eos
<div class="h-app">
<img src="/logo.png" class="u-logo">
<a href="/" class="u-url p-name">Example App</a>
</div>"
eos";
struct App {
@MicroFormatProperty(MicroFormatProperty.Type.Url)
string logo;
@MicroFormatProperty(MicroFormatProperty.Type.PlainText)
string name;
}
auto ts = parsePage!App(page, URL("https://example.com/"));
assert(ts[0] == App ("https://example.com:443/logo.png", "Example App"));
}