From 7af782ea9c6f1818e2e445b0695398ed7df42480 Mon Sep 17 00:00:00 2001 From: Waqas Hussain Date: Fri, 19 Feb 2010 22:32:28 +0500 Subject: mod_storage, plus a bit of SQL and XML. --- plugins/storage/xmlparse.lib.lua | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 plugins/storage/xmlparse.lib.lua (limited to 'plugins/storage/xmlparse.lib.lua') diff --git a/plugins/storage/xmlparse.lib.lua b/plugins/storage/xmlparse.lib.lua new file mode 100644 index 00000000..91063995 --- /dev/null +++ b/plugins/storage/xmlparse.lib.lua @@ -0,0 +1,56 @@ + +local st = require "util.stanza"; + +-- XML parser +local parse_xml = (function() + local entity_map = setmetatable({ + ["amp"] = "&"; + ["gt"] = ">"; + ["lt"] = "<"; + ["apos"] = "'"; + ["quot"] = "\""; + }, {__index = function(_, s) + if s:sub(1,1) == "#" then + if s:sub(2,2) == "x" then + return string.char(tonumber(s:sub(3), 16)); + else + return string.char(tonumber(s:sub(2))); + end + end + end + }); + local function xml_unescape(str) + return (str:gsub("&(.-);", entity_map)); + end + local function parse_tag(s) + local name,sattr=(s):gmatch("([^%s]+)(.*)")(); + local attr = {}; + for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end + return name, attr; + end + return function(xml) + local stanza = st.stanza("root"); + local regexp = "<([^>]*)>([^<]*)"; + for elem, text in xml:gmatch(regexp) do + if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions + elseif elem:sub(1,1) == "/" then -- end tag + elem = elem:sub(2); + stanza:up(); -- TODO check for start-end tag name match + elseif elem:sub(-1,-1) == "/" then -- empty tag + elem = elem:sub(1,-2); + local name,attr = parse_tag(elem); + stanza:tag(name, attr):up(); + else -- start tag + local name,attr = parse_tag(elem); + stanza:tag(name, attr); + end + if #text ~= 0 then -- text + stanza:text(xml_unescape(text)); + end + end + return stanza.tags[1]; + end +end)(); +-- end of XML parser + +return parse_xml; -- cgit v1.2.3