1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
local coroutine = coroutine;
local tonumber = tonumber;
local string = string;
local setmetatable, getmetatable = setmetatable, getmetatable;
local pairs = pairs;
local deadroutine = coroutine.create(function() end);
coroutine.resume(deadroutine);
module("lxp")
local entity_map = setmetatable({
["amp"] = "&";
["gt"] = ">";
["lt"] = "<";
["apos"] = "'";
["quot"] = "\"";
}, {__index = function(_, s)
if s:sub(1,1) == "#" then
if s:sub(2,2) == "x" then
return string.char(tonumber(s:sub(3), 16));
else
return string.char(tonumber(s:sub(2)));
end
end
end
});
local function xml_unescape(str)
return (str:gsub("&(.-);", entity_map));
end
local function parse_tag(s)
local name,sattr=(s):gmatch("([^%s]+)(.*)")();
local attr = {};
for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
return name, attr;
end
local function parser(data, handlers, ns_separator)
local function read_until(str)
local pos = data:find(str, nil, true);
while not pos do
data = data..coroutine.yield();
pos = data:find(str, nil, true);
end
local r = data:sub(1, pos);
data = data:sub(pos+1);
return r;
end
local function read_before(str)
local pos = data:find(str, nil, true);
while not pos do
data = data..coroutine.yield();
pos = data:find(str, nil, true);
end
local r = data:sub(1, pos-1);
data = data:sub(pos);
return r;
end
local function peek()
while #data == 0 do data = coroutine.yield(); end
return data:sub(1,1);
end
local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
ns.__index = ns;
local function apply_ns(name, dodefault)
local prefix,n = name:match("^([^:]*):(.*)$");
if prefix and ns[prefix] then
return ns[prefix]..ns_separator..n;
end
if dodefault and ns[""] then
return ns[""]..ns_separator..name;
end
return name;
end
local function push(tag, attr)
ns = setmetatable({}, ns);
for k,v in pairs(attr) do
local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
if xmlns then
ns[xmlns] = v;
attr[k] = nil;
end
end
local newattr, n = {}, 0;
for k,v in pairs(attr) do
n = n+1;
k = apply_ns(k);
newattr[n] = k;
newattr[k] = v;
end
tag = apply_ns(tag, true);
ns[0] = tag;
ns.__index = ns;
return tag, newattr;
end
local function pop()
local tag = ns[0];
ns = getmetatable(ns);
return tag;
end
while true do
if peek() == "<" then
local elem = read_until(">"):sub(2,-2);
if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
elseif elem:sub(1,1) == "/" then -- end tag
elem = elem:sub(2);
local name = pop();
handlers:EndElement(name); -- TODO check for start-end tag name match
elseif elem:sub(-1,-1) == "/" then -- empty tag
elem = elem:sub(1,-2);
local name,attr = parse_tag(elem);
name,attr = push(name,attr);
handlers:StartElement(name,attr);
name = pop();
handlers:EndElement(name);
else -- start tag
local name,attr = parse_tag(elem);
name,attr = push(name,attr);
handlers:StartElement(name,attr);
end
else
local text = read_before("<");
handlers:CharacterData(xml_unescape(text));
end
end
end
function new(handlers, ns_separator)
local co = coroutine.create(parser);
return {
parse = function(self, data)
if not data then
co = deadroutine;
return true; -- eof
end
local success, result = coroutine.resume(co, data, handlers, ns_separator);
if result then
co = deadroutine;
return nil, result; -- error
end
return true; -- success
end;
};
end
return _M;
|