1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
local st = require "util.stanza";
local lxp = require "lxp";
local t_insert = table.insert;
local t_remove = table.remove;
local error = error;
local _ENV = nil;
-- luacheck: std none
local parse_xml = (function()
local ns_prefixes = {
["http://www.w3.org/XML/1998/namespace"] = "xml";
};
local ns_separator = "\1";
local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
return function(xml, options)
--luacheck: ignore 212/self
local handler = {};
local stanza = st.stanza("root");
local namespaces = {};
local prefixes = {};
function handler:StartNamespaceDecl(prefix, url)
if prefix ~= nil then
t_insert(namespaces, url);
t_insert(prefixes, prefix);
end
end
function handler:EndNamespaceDecl(prefix)
if prefix ~= nil then
-- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl
t_remove(namespaces);
t_remove(prefixes);
end
end
function handler:StartElement(tagname, attr)
local curr_ns,name = tagname:match(ns_pattern);
if name == "" then
curr_ns, name = "", curr_ns;
end
if curr_ns ~= "" then
attr.xmlns = curr_ns;
end
for i=1,#attr do
local k = attr[i];
attr[i] = nil;
local ns, nm = k:match(ns_pattern);
if nm ~= "" then
ns = ns_prefixes[ns];
if ns then
attr[ns..":"..nm] = attr[k];
attr[k] = nil;
end
end
end
local n = {}
for i=1,#namespaces do
n[prefixes[i]] = namespaces[i];
end
stanza:tag(name, attr, n);
end
function handler:CharacterData(data)
stanza:text(data);
end
function handler:EndElement()
stanza:up();
end
-- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs.
local function restricted_handler(parser)
if not parser.stop or not parser:stop() then
error("Failed to abort parsing");
end
end
handler.StartDoctypeDecl = restricted_handler;
if not options or not options.allow_comments then
-- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data
handler.Comment = restricted_handler;
end
if not options or not options.allow_processing_instructions then
-- Processing instructions should generally be safe to just ignore
handler.ProcessingInstruction = restricted_handler;
end
local parser = lxp.new(handler, ns_separator);
local ok, err, line, col = parser:parse(xml);
if ok then ok, err, line, col = parser:parse(); end
--parser:close();
if ok then
return stanza.tags[1];
else
return ok, ("%s (line %d, col %d))"):format(err, line, col);
end
end;
end)();
return {
parse = parse_xml;
};
|