From d64f65005c8f931e44aa8f37a442051983caf4b2 Mon Sep 17 00:00:00 2001 From: Matthew Wild Date: Sat, 6 Apr 2013 12:20:31 +0100 Subject: util.json: Convert \uXXXX to UTF-8 when decoding --- util/json.lua | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'util') diff --git a/util/json.lua b/util/json.lua index abb87eab..ff7351a7 100644 --- a/util/json.lua +++ b/util/json.lua @@ -1,3 +1,12 @@ +-- Prosody IM +-- Copyright (C) 2008-2010 Matthew Wild +-- Copyright (C) 2008-2010 Waqas Hussain +-- +-- utf8char copyright (C) 2007 Rici Lake +-- +-- This project is MIT/X11 licensed. Please see the +-- COPYING file in the source package for more information. +-- local type = type; local t_insert, t_concat, t_remove, t_sort = table.insert, table.concat, table.remove, table.sort; @@ -29,6 +38,32 @@ for i=0,31 do if not escapes[ch] then escapes[ch] = ("\\u%.4X"):format(i); end end +local function utf8char(i) + if i >= 0 then + i = i - i%1 + if i < 128 then + return s_char(i) + else + local c1 = i % 64 + i = (i - c1) / 64 + if i < 32 then + return s_char(0xC0+i, 0x80+c1) + else + local c2 = i % 64 + i = (i - c2) / 64 + if i < 16 and (i ~= 13 or c2 < 32) then + return s_char(0xE0+i, 0x80+c2, 0x80+c1) + elseif i >= 16 and i < 0x110 then + local c3 = i % 64 + i = (i - c3) / 64 + return s_char(0xF0+i, 0x80+c3, 0x80+c2, 0x80+c1) + end + end + end + end +end + + local valid_types = { number = true, string = true, @@ -249,7 +284,7 @@ function json.decode(json) if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end seq = seq..ch; end - s = s..s.char(tonumber(seq, 16)); -- FIXME do proper utf-8 + s = s..utf8char(tonumber(seq, 16)); next(); else error("invalid escape sequence in string"); end end -- cgit v1.2.3