diff options
-rw-r--r-- | fallbacks/lxp.lua | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/fallbacks/lxp.lua b/fallbacks/lxp.lua new file mode 100644 index 00000000..21cab4af --- /dev/null +++ b/fallbacks/lxp.lua @@ -0,0 +1,149 @@ +
+local coroutine = coroutine;
+local tonumber = tonumber;
+local string = string;
+local setmetatable, getmetatable = setmetatable, getmetatable;
+local pairs = pairs;
+
+local deadroutine = coroutine.create(function() end);
+coroutine.resume(deadroutine);
+
+module("lxp")
+
+local entity_map = setmetatable({
+ ["amp"] = "&";
+ ["gt"] = ">";
+ ["lt"] = "<";
+ ["apos"] = "'";
+ ["quot"] = "\"";
+}, {__index = function(_, s)
+ if s:sub(1,1) == "#" then
+ if s:sub(2,2) == "x" then
+ return string.char(tonumber(s:sub(3), 16));
+ else
+ return string.char(tonumber(s:sub(2)));
+ end
+ end
+ end
+});
+local function xml_unescape(str)
+ return (str:gsub("&(.-);", entity_map));
+end
+local function parse_tag(s)
+ local name,sattr=(s):gmatch("([^%s]+)(.*)")();
+ local attr = {};
+ for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
+ return name, attr;
+end
+
+local function parser(data, handlers, ns_separator)
+ local function read_until(str)
+ local pos = data:find(str, nil, true);
+ while not pos do
+ data = data..coroutine.yield();
+ pos = data:find(str, nil, true);
+ end
+ local r = data:sub(1, pos);
+ data = data:sub(pos+1);
+ return r;
+ end
+ local function read_before(str)
+ local pos = data:find(str, nil, true);
+ while not pos do
+ data = data..coroutine.yield();
+ pos = data:find(str, nil, true);
+ end
+ local r = data:sub(1, pos-1);
+ data = data:sub(pos);
+ return r;
+ end
+ local function peek()
+ while #data == 0 do data = coroutine.yield(); end
+ return data:sub(1,1);
+ end
+
+ local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
+ ns.__index = ns;
+ local function apply_ns(name, dodefault)
+ local prefix,n = name:match("^([^:]*):(.*)$");
+ if prefix and ns[prefix] then
+ return ns[prefix]..ns_separator..n;
+ end
+ if dodefault and ns[""] then
+ return ns[""]..ns_separator..name;
+ end
+ return name;
+ end
+ local function push(tag, attr)
+ ns = setmetatable({}, ns);
+ for k,v in pairs(attr) do
+ local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
+ if xmlns then
+ ns[xmlns] = v;
+ attr[k] = nil;
+ end
+ end
+ local newattr, n = {}, 0;
+ for k,v in pairs(attr) do
+ n = n+1;
+ k = apply_ns(k);
+ newattr[n] = k;
+ newattr[k] = v;
+ end
+ tag = apply_ns(tag, true);
+ ns[0] = tag;
+ ns.__index = ns;
+ return tag, newattr;
+ end
+ local function pop()
+ local tag = ns[0];
+ ns = getmetatable(ns);
+ return tag;
+ end
+
+ while true do
+ if peek() == "<" then
+ local elem = read_until(">"):sub(2,-2);
+ if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
+ elseif elem:sub(1,1) == "/" then -- end tag
+ elem = elem:sub(2);
+ local name = pop();
+ handlers:EndElement(name); -- TODO check for start-end tag name match
+ elseif elem:sub(-1,-1) == "/" then -- empty tag
+ elem = elem:sub(1,-2);
+ local name,attr = parse_tag(elem);
+ name,attr = push(name,attr);
+ handlers:StartElement(name,attr);
+ name = pop();
+ handlers:EndElement(name);
+ else -- start tag
+ local name,attr = parse_tag(elem);
+ name,attr = push(name,attr);
+ handlers:StartElement(name,attr);
+ end
+ else
+ local text = read_before("<");
+ handlers:CharacterData(xml_unescape(text));
+ end
+ end
+end
+
+function new(handlers, ns_separator)
+ local co = coroutine.create(parser);
+ return {
+ parse = function(self, data)
+ if not data then
+ co = deadroutine;
+ return true; -- eof
+ end
+ local success, result = coroutine.resume(co, data, handlers, ns_separator);
+ if result then
+ co = deadroutine;
+ return nil, result; -- error
+ end
+ return true; -- success
+ end;
+ };
+end
+
+return _M;
|