aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaqas Hussain <waqas20@gmail.com>2013-05-07 10:42:44 -0400
committerWaqas Hussain <waqas20@gmail.com>2013-05-07 10:42:44 -0400
commitd960ec8fb793d6d789f79535b3680c869e9bba54 (patch)
treee9a803d00e750f81e04dd175a3946bb05da846c7
parent11684a2d6c54123c73ceeab0d4a66b6dae6b5f09 (diff)
downloadprosody-d960ec8fb793d6d789f79535b3680c869e9bba54.tar.gz
prosody-d960ec8fb793d6d789f79535b3680c869e9bba54.zip
util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input.
-rw-r--r--util/json.lua347
1 files changed, 155 insertions, 192 deletions
diff --git a/util/json.lua b/util/json.lua
index 0ce315b2..82ebcc43 100644
--- a/util/json.lua
+++ b/util/json.lua
@@ -185,214 +185,177 @@ end
-----------------------------------
-function json.decode(json)
- json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
- local pos = 1;
- local current = {};
- local stack = {};
- local ch, peek;
- local function next()
- ch = json:sub(pos, pos);
- if ch == "" then error("Unexpected EOF"); end
- pos = pos+1;
- peek = json:sub(pos, pos);
- return ch;
- end
-
- local function skipwhitespace()
- while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
- next();
+local function _skip_whitespace(json, index)
+ return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t
+end
+local function _fixobject(obj)
+ local __array = obj.__array;
+ if __array then
+ obj.__array = nil;
+ for i,v in ipairs(__array) do
+ t_insert(obj, v);
end
end
- local function skiplinecomment()
- repeat next(); until not(ch) or ch == "\r" or ch == "\n";
- skipwhitespace();
- end
- local function skipstarcomment()
- next(); next(); -- skip '/', '*'
- while peek and ch ~= "*" and peek ~= "/" do next(); end
- if not peek then error("eof in star comment") end
- next(); next(); -- skip '*', '/'
- skipwhitespace();
- end
- local function skipstuff()
- while true do
- skipwhitespace();
- if ch == "/" and peek == "*" then
- skipstarcomment();
- elseif ch == "/" and peek == "/" then
- skiplinecomment();
+ local __hash = obj.__hash;
+ if __hash then
+ obj.__hash = nil;
+ local k;
+ for i,v in ipairs(__hash) do
+ if k ~= nil then
+ obj[k] = v; k = nil;
else
- return;
+ k = v;
end
end
end
-
- local readvalue;
- local function readarray()
- local t = setmetatable({}, array_mt);
- next(); -- skip '['
- skipstuff();
- if ch == "]" then next(); return t; end
- t_insert(t, readvalue());
- while true do
- skipstuff();
- if ch == "]" then next(); return t; end
- if not ch then error("eof while reading array");
- elseif ch == "," then next();
- elseif ch then error("unexpected character in array, comma expected"); end
- if not ch then error("eof while reading array"); end
- t_insert(t, readvalue());
+ return obj;
+end
+local _readvalue, _readstring;
+local function _readobject(json, index)
+ local o = {};
+ while true do
+ local key, val;
+ index = _skip_whitespace(json, index + 1);
+ if json:byte(index) ~= 0x22 then -- "\""
+ if json:byte(index) == 0x7d then return o, index + 1; end -- "}"
+ return nil, "key expected";
end
+ key, index = _readstring(json, index);
+ if key == nil then return nil, index; end
+ index = _skip_whitespace(json, index);
+ if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":"
+ val, index = _readvalue(json, index + 1);
+ if val == nil then return nil, index; end
+ o[key] = val;
+ index = _skip_whitespace(json, index);
+ local b = json:byte(index);
+ if b == 0x7d then return _fixobject(o), index + 1; end -- "}"
+ if b ~= 0x2c then return nil, "object eof"; end -- ","
end
-
- local function checkandskip(c)
- local x = ch or "eof";
- if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
- next();
- end
- local function readliteral(lit, val)
- for c in lit:gmatch(".") do
- checkandskip(c);
+end
+local function _readarray(json, index)
+ local a = {};
+ local oindex = index;
+ while true do
+ local val;
+ val, index = _readvalue(json, index + 1);
+ if val == nil then
+ if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]"
+ return val, index;
end
- return val;
+ t_insert(a, val);
+ index = _skip_whitespace(json, index);
+ local b = json:byte(index);
+ if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
+ if b ~= 0x2c then return nil, "array eof"; end -- ","
end
- local function readstring()
- local s = {};
- checkandskip("\"");
- while ch do
- while ch and ch ~= "\\" and ch ~= "\"" do
- t_insert(s, ch); next();
- end
- if ch == "\\" then
- next();
- if unescapes[ch] then
- t_insert(s, unescapes[ch]);
- next();
- elseif ch == "u" then
- local seq = "";
- for i=1,4 do
- next();
- if not ch then error("unexpected eof in string"); end
- if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
- seq = seq..ch;
- end
- t_insert(s, codepoint_to_utf8(tonumber(seq, 16)));
- next();
- else error("invalid escape sequence in string"); end
- end
- if ch == "\"" then
- next();
- return t_concat(s);
- end
- end
- error("eof while reading string");
+end
+local _unescape_error;
+local function _unescape_surrogate_func(x)
+ local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
+ local codepoint = lead * 0x400 + trail - 0x35FDC00;
+ local a = codepoint % 64;
+ codepoint = (codepoint - a) / 64;
+ local b = codepoint % 64;
+ codepoint = (codepoint - b) / 64;
+ local c = codepoint % 64;
+ codepoint = (codepoint - c) / 64;
+ return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
+end
+local function _unescape_func(x)
+ x = x:match("%x%x%x%x", 3);
+ if x then
+ --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
+ return codepoint_to_utf8(tonumber(x, 16));
end
- local function readnumber()
- local s = "";
- if ch == "-" then
- s = s..ch; next();
- if not ch:match("[0-9]") then error("number format error"); end
- end
- if ch == "0" then
- s = s..ch; next();
- if ch:match("[0-9]") then error("number format error"); end
- else
- while ch and ch:match("[0-9]") do
- s = s..ch; next();
- end
- end
- if ch == "." then
- s = s..ch; next();
- if not ch:match("[0-9]") then error("number format error"); end
- while ch and ch:match("[0-9]") do
- s = s..ch; next();
- end
- if ch == "e" or ch == "E" then
- s = s..ch; next();
- if ch == "+" or ch == "-" then
- s = s..ch; next();
- if not ch:match("[0-9]") then error("number format error"); end
- while ch and ch:match("[0-9]") do
- s = s..ch; next();
- end
- end
- end
- end
- return tonumber(s);
+ _unescape_error = true;
+end
+function _readstring(json, index)
+ index = index + 1;
+ local endindex = json:find("\"", index, true);
+ if endindex then
+ local s = json:sub(index, endindex - 1);
+ --if s:find("[%z-\31]") then return nil, "control char in string"; end
+ -- FIXME handle control characters
+ _unescape_error = nil;
+ --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
+ -- FIXME handle escapes beyond BMP
+ s = s:gsub("\\u.?.?.?.?", _unescape_func);
+ if _unescape_error then return nil, "invalid escape"; end
+ return s, endindex + 1;
end
- local function readmember(t)
- skipstuff();
- local k = readstring();
- skipstuff();
- checkandskip(":");
- t[k] = readvalue();
+ return nil, "string eof";
+end
+local function _readnumber(json, index)
+ local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking
+ return tonumber(m), index + #m;
+end
+local function _readnull(json, index)
+ local a, b, c = json:byte(index + 1, index + 3);
+ if a == 0x75 and b == 0x6c and c == 0x6c then
+ return null, index + 4;
end
- local function fixobject(obj)
- local __array = obj.__array;
- if __array then
- obj.__array = nil;
- for i,v in ipairs(__array) do
- t_insert(obj, v);
- end
- end
- local __hash = obj.__hash;
- if __hash then
- obj.__hash = nil;
- local k;
- for i,v in ipairs(__hash) do
- if k ~= nil then
- obj[k] = v; k = nil;
- else
- k = v;
- end
- end
- end
- return obj;
+ return nil, "null parse failed";
+end
+local function _readtrue(json, index)
+ local a, b, c = json:byte(index + 1, index + 3);
+ if a == 0x72 and b == 0x75 and c == 0x65 then
+ return true, index + 4;
end
- local function readobject()
- local t = {};
- next(); -- skip '{'
- skipstuff();
- if ch == "}" then next(); return t; end
- if not ch then error("eof while reading object"); end
- readmember(t);
- while true do
- skipstuff();
- if ch == "}" then next(); return fixobject(t); end
- if not ch then error("eof while reading object");
- elseif ch == "," then next();
- elseif ch then error("unexpected character in object, comma expected"); end
- if not ch then error("eof while reading object"); end
- readmember(t);
- end
+ return nil, "true parse failed";
+end
+local function _readfalse(json, index)
+ local a, b, c, d = json:byte(index + 1, index + 4);
+ if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then
+ return false, index + 5;
end
-
- function readvalue()
- skipstuff();
- while ch do
- if ch == "{" then
- return readobject();
- elseif ch == "[" then
- return readarray();
- elseif ch == "\"" then
- return readstring();
- elseif ch:match("[%-0-9%.]") then
- return readnumber();
- elseif ch == "n" then
- return readliteral("null", null);
- elseif ch == "t" then
- return readliteral("true", true);
- elseif ch == "f" then
- return readliteral("false", false);
- else
- error("invalid character at value start: "..ch);
- end
- end
- error("eof while reading value");
+ return nil, "false parse failed";
+end
+function _readvalue(json, index)
+ index = _skip_whitespace(json, index);
+ local b = json:byte(index);
+ -- TODO try table lookup instead of if-else?
+ if b == 0x7B then -- "{"
+ return _readobject(json, index);
+ elseif b == 0x5B then -- "["
+ return _readarray(json, index);
+ elseif b == 0x22 then -- "\""
+ return _readstring(json, index);
+ elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-"
+ return _readnumber(json, index);
+ elseif b == 0x6e then -- "n"
+ return _readnull(json, index);
+ elseif b == 0x74 then -- "t"
+ return _readtrue(json, index);
+ elseif b == 0x66 then -- "f"
+ return _readfalse(json, index);
+ else
+ return nil, "value expected";
end
- next();
- return readvalue();
+end
+local first_escape = {
+ ["\\\""] = "\\u0022";
+ ["\\\\"] = "\\u005c";
+ ["\\/" ] = "\\u002f";
+ ["\\b" ] = "\\u0008";
+ ["\\f" ] = "\\u000C";
+ ["\\n" ] = "\\u000A";
+ ["\\r" ] = "\\u000D";
+ ["\\t" ] = "\\u0009";
+ ["\\u" ] = "\\u";
+};
+
+function json.decode(json)
+ json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler
+ --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings
+
+ -- TODO do encoding verification
+
+ local val, index = _readvalue(json, 1);
+ if val == nil then return val, index; end
+ if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end
+
+ return val;
end
function json.test(object)