From 9ca6d3a04366361775da07d4c738fc0ca3bac432 Mon Sep 17 00:00:00 2001 From: Waqas Hussain Date: Tue, 7 May 2013 10:42:44 -0400 Subject: util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err instead of throwing errors on invalid input. --- util/json.lua | 347 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 155 insertions(+), 192 deletions(-) (limited to 'util/json.lua') diff --git a/util/json.lua b/util/json.lua index 0ce315b2..82ebcc43 100644 --- a/util/json.lua +++ b/util/json.lua @@ -185,214 +185,177 @@ end ----------------------------------- -function json.decode(json) - json = json.." "; -- appending a space ensures valid json wouldn't touch EOF - local pos = 1; - local current = {}; - local stack = {}; - local ch, peek; - local function next() - ch = json:sub(pos, pos); - if ch == "" then error("Unexpected EOF"); end - pos = pos+1; - peek = json:sub(pos, pos); - return ch; - end - - local function skipwhitespace() - while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do - next(); +local function _skip_whitespace(json, index) + return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t +end +local function _fixobject(obj) + local __array = obj.__array; + if __array then + obj.__array = nil; + for i,v in ipairs(__array) do + t_insert(obj, v); end end - local function skiplinecomment() - repeat next(); until not(ch) or ch == "\r" or ch == "\n"; - skipwhitespace(); - end - local function skipstarcomment() - next(); next(); -- skip '/', '*' - while peek and ch ~= "*" and peek ~= "/" do next(); end - if not peek then error("eof in star comment") end - next(); next(); -- skip '*', '/' - skipwhitespace(); - end - local function skipstuff() - while true do - skipwhitespace(); - if ch == "/" and peek == "*" then - skipstarcomment(); - elseif ch == "/" and peek == "/" then - skiplinecomment(); + local __hash = obj.__hash; + if __hash then + obj.__hash = nil; + local k; + for i,v in ipairs(__hash) do + if k ~= nil then + obj[k] = v; k = nil; else - return; + k = v; end end end - - local readvalue; - local function readarray() - local t = setmetatable({}, array_mt); - next(); -- skip '[' - skipstuff(); - if ch == "]" then next(); return t; end - t_insert(t, readvalue()); - while true do - skipstuff(); - if ch == "]" then next(); return t; end - if not ch then error("eof while reading array"); - elseif ch == "," then next(); - elseif ch then error("unexpected character in array, comma expected"); end - if not ch then error("eof while reading array"); end - t_insert(t, readvalue()); + return obj; +end +local _readvalue, _readstring; +local function _readobject(json, index) + local o = {}; + while true do + local key, val; + index = _skip_whitespace(json, index + 1); + if json:byte(index) ~= 0x22 then -- "\"" + if json:byte(index) == 0x7d then return o, index + 1; end -- "}" + return nil, "key expected"; end + key, index = _readstring(json, index); + if key == nil then return nil, index; end + index = _skip_whitespace(json, index); + if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":" + val, index = _readvalue(json, index + 1); + if val == nil then return nil, index; end + o[key] = val; + index = _skip_whitespace(json, index); + local b = json:byte(index); + if b == 0x7d then return _fixobject(o), index + 1; end -- "}" + if b ~= 0x2c then return nil, "object eof"; end -- "," end - - local function checkandskip(c) - local x = ch or "eof"; - if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end - next(); - end - local function readliteral(lit, val) - for c in lit:gmatch(".") do - checkandskip(c); +end +local function _readarray(json, index) + local a = {}; + local oindex = index; + while true do + local val; + val, index = _readvalue(json, index + 1); + if val == nil then + if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]" + return val, index; end - return val; + t_insert(a, val); + index = _skip_whitespace(json, index); + local b = json:byte(index); + if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]" + if b ~= 0x2c then return nil, "array eof"; end -- "," end - local function readstring() - local s = {}; - checkandskip("\""); - while ch do - while ch and ch ~= "\\" and ch ~= "\"" do - t_insert(s, ch); next(); - end - if ch == "\\" then - next(); - if unescapes[ch] then - t_insert(s, unescapes[ch]); - next(); - elseif ch == "u" then - local seq = ""; - for i=1,4 do - next(); - if not ch then error("unexpected eof in string"); end - if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end - seq = seq..ch; - end - t_insert(s, codepoint_to_utf8(tonumber(seq, 16))); - next(); - else error("invalid escape sequence in string"); end - end - if ch == "\"" then - next(); - return t_concat(s); - end - end - error("eof while reading string"); +end +local _unescape_error; +local function _unescape_surrogate_func(x) + local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16); + local codepoint = lead * 0x400 + trail - 0x35FDC00; + local a = codepoint % 64; + codepoint = (codepoint - a) / 64; + local b = codepoint % 64; + codepoint = (codepoint - b) / 64; + local c = codepoint % 64; + codepoint = (codepoint - c) / 64; + return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a); +end +local function _unescape_func(x) + x = x:match("%x%x%x%x", 3); + if x then + --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair + return codepoint_to_utf8(tonumber(x, 16)); end - local function readnumber() - local s = ""; - if ch == "-" then - s = s..ch; next(); - if not ch:match("[0-9]") then error("number format error"); end - end - if ch == "0" then - s = s..ch; next(); - if ch:match("[0-9]") then error("number format error"); end - else - while ch and ch:match("[0-9]") do - s = s..ch; next(); - end - end - if ch == "." then - s = s..ch; next(); - if not ch:match("[0-9]") then error("number format error"); end - while ch and ch:match("[0-9]") do - s = s..ch; next(); - end - if ch == "e" or ch == "E" then - s = s..ch; next(); - if ch == "+" or ch == "-" then - s = s..ch; next(); - if not ch:match("[0-9]") then error("number format error"); end - while ch and ch:match("[0-9]") do - s = s..ch; next(); - end - end - end - end - return tonumber(s); + _unescape_error = true; +end +function _readstring(json, index) + index = index + 1; + local endindex = json:find("\"", index, true); + if endindex then + local s = json:sub(index, endindex - 1); + --if s:find("[%z-\31]") then return nil, "control char in string"; end + -- FIXME handle control characters + _unescape_error = nil; + --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func); + -- FIXME handle escapes beyond BMP + s = s:gsub("\\u.?.?.?.?", _unescape_func); + if _unescape_error then return nil, "invalid escape"; end + return s, endindex + 1; end - local function readmember(t) - skipstuff(); - local k = readstring(); - skipstuff(); - checkandskip(":"); - t[k] = readvalue(); + return nil, "string eof"; +end +local function _readnumber(json, index) + local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking + return tonumber(m), index + #m; +end +local function _readnull(json, index) + local a, b, c = json:byte(index + 1, index + 3); + if a == 0x75 and b == 0x6c and c == 0x6c then + return null, index + 4; end - local function fixobject(obj) - local __array = obj.__array; - if __array then - obj.__array = nil; - for i,v in ipairs(__array) do - t_insert(obj, v); - end - end - local __hash = obj.__hash; - if __hash then - obj.__hash = nil; - local k; - for i,v in ipairs(__hash) do - if k ~= nil then - obj[k] = v; k = nil; - else - k = v; - end - end - end - return obj; + return nil, "null parse failed"; +end +local function _readtrue(json, index) + local a, b, c = json:byte(index + 1, index + 3); + if a == 0x72 and b == 0x75 and c == 0x65 then + return true, index + 4; end - local function readobject() - local t = {}; - next(); -- skip '{' - skipstuff(); - if ch == "}" then next(); return t; end - if not ch then error("eof while reading object"); end - readmember(t); - while true do - skipstuff(); - if ch == "}" then next(); return fixobject(t); end - if not ch then error("eof while reading object"); - elseif ch == "," then next(); - elseif ch then error("unexpected character in object, comma expected"); end - if not ch then error("eof while reading object"); end - readmember(t); - end + return nil, "true parse failed"; +end +local function _readfalse(json, index) + local a, b, c, d = json:byte(index + 1, index + 4); + if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then + return false, index + 5; end - - function readvalue() - skipstuff(); - while ch do - if ch == "{" then - return readobject(); - elseif ch == "[" then - return readarray(); - elseif ch == "\"" then - return readstring(); - elseif ch:match("[%-0-9%.]") then - return readnumber(); - elseif ch == "n" then - return readliteral("null", null); - elseif ch == "t" then - return readliteral("true", true); - elseif ch == "f" then - return readliteral("false", false); - else - error("invalid character at value start: "..ch); - end - end - error("eof while reading value"); + return nil, "false parse failed"; +end +function _readvalue(json, index) + index = _skip_whitespace(json, index); + local b = json:byte(index); + -- TODO try table lookup instead of if-else? + if b == 0x7B then -- "{" + return _readobject(json, index); + elseif b == 0x5B then -- "[" + return _readarray(json, index); + elseif b == 0x22 then -- "\"" + return _readstring(json, index); + elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-" + return _readnumber(json, index); + elseif b == 0x6e then -- "n" + return _readnull(json, index); + elseif b == 0x74 then -- "t" + return _readtrue(json, index); + elseif b == 0x66 then -- "f" + return _readfalse(json, index); + else + return nil, "value expected"; end - next(); - return readvalue(); +end +local first_escape = { + ["\\\""] = "\\u0022"; + ["\\\\"] = "\\u005c"; + ["\\/" ] = "\\u002f"; + ["\\b" ] = "\\u0008"; + ["\\f" ] = "\\u000C"; + ["\\n" ] = "\\u000A"; + ["\\r" ] = "\\u000D"; + ["\\t" ] = "\\u0009"; + ["\\u" ] = "\\u"; +}; + +function json.decode(json) + json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler + --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings + + -- TODO do encoding verification + + local val, index = _readvalue(json, 1); + if val == nil then return val, index; end + if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end + + return val; end function json.test(object) -- cgit v1.2.3