117 lines
2.7 KiB
D
117 lines
2.7 KiB
D
/**
|
|
* Parser for microformats.
|
|
* Standards: http://microformats.org/wiki/microformats2-parsing
|
|
*/
|
|
module nl.netsoj.chris.blog.microformats.parser;
|
|
|
|
import std.exception;
|
|
import std.traits;
|
|
|
|
import vibe.inet.url;
|
|
|
|
import html;
|
|
|
|
import nl.netsoj.chris.blog.url;
|
|
|
|
class MicroFormatParseException : Exception {
|
|
mixin basicExceptionCtors;
|
|
}
|
|
|
|
|
|
struct MicroFormatProperty {
|
|
enum Type {
|
|
RootClass,
|
|
PlainText,
|
|
Url,
|
|
DateTime,
|
|
EmbeddedMarkup
|
|
};
|
|
|
|
Type type = Type.PlainText;
|
|
|
|
string getClassPrefix() pure {
|
|
final switch(type) {
|
|
case Type.RootClass:
|
|
return "h-";
|
|
case Type.PlainText:
|
|
return "p-";
|
|
case Type.Url:
|
|
return "u-";
|
|
case Type.DateTime:
|
|
return "dt-";
|
|
case Type.EmbeddedMarkup:
|
|
return "e-";
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Parses a web page to extract to the given microformat model out of it
|
|
*/
|
|
T[] parsePage(T)(string source, URL url) {
|
|
return parsePage!T(createDocument(source), url);
|
|
}
|
|
|
|
T[] parsePage(T)(Document page, URL url)
|
|
if (isAggregateType!T) {
|
|
import std.algorithm;
|
|
import std.array;
|
|
import std.conv;
|
|
import std.range;
|
|
import std.string;
|
|
|
|
string rootClass = "h-" ~ T.stringof.toLower;
|
|
|
|
return page.querySelectorAll(".%s".format(rootClass)).map!((node){
|
|
alias PropType = MicroFormatProperty.Type;
|
|
T instance = T();
|
|
MicroFormatProperty uda;
|
|
string propertyClass;
|
|
Node propNode;
|
|
static foreach(sym; getSymbolsByUDA!(T, MicroFormatProperty)) {
|
|
uda = getUDAs!(sym, MicroFormatProperty)[0];
|
|
propertyClass = uda.getClassPrefix() ~ sym.stringof.toLower;
|
|
|
|
propNode = page.querySelector(".%s".format(propertyClass), node);
|
|
switch (uda.type) {
|
|
case PropType.PlainText:
|
|
if (propNode.firstChild && propNode.firstChild.isTextNode()) {
|
|
__traits(getMember, instance, sym.stringof) = to!string(propNode.text);
|
|
}
|
|
break;
|
|
case PropType.Url:
|
|
if (propNode.tag == "a" && propNode.hasAttr("href")) {
|
|
__traits(getMember, instance, sym.stringof) = resolveURL(url, to!string(propNode["href"])).toString;
|
|
} else if (propNode.tag == "img" && propNode.hasAttr("src")) {
|
|
__traits(getMember, instance, sym.stringof) = resolveURL(url, to!string(propNode["src"])).toString;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return instance;
|
|
}).array;
|
|
}
|
|
|
|
unittest {
|
|
import std.stdio;
|
|
string page = q"eos
|
|
<div class="h-app">
|
|
<img src="/logo.png" class="u-logo">
|
|
<a href="/" class="u-url p-name">Example App</a>
|
|
</div>"
|
|
eos";
|
|
|
|
struct App {
|
|
@MicroFormatProperty(MicroFormatProperty.Type.Url)
|
|
string logo;
|
|
@MicroFormatProperty(MicroFormatProperty.Type.PlainText)
|
|
string name;
|
|
}
|
|
|
|
auto ts = parsePage!App(page, URL("https://example.com/"));
|
|
assert(ts[0] == App ("https://example.com:443/logo.png", "Example App"));
|
|
}
|