From b21d24e5cb146153b527b44da8a5bd7fae3c2ada Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Mon, 23 Mar 2015 14:26:34 +0100 Subject: util.encodings: Perform validation of UTF-8 strings before passing to libidn (Based on code from the utf8 library in Lua 5.3) --- util-src/encodings.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 3 deletions(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index b9b6160a..898add1a 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -1,6 +1,7 @@ /* Prosody IM -- Copyright (C) 2008-2010 Matthew Wild -- Copyright (C) 2008-2010 Waqas Hussain +-- Copyright (C) 1994-2015 Lua.org, PUC-Rio. -- -- This project is MIT/X11 licensed. Please see the -- COPYING file in the source package for more information. @@ -116,6 +117,65 @@ static const luaL_Reg Reg_base64[] = { NULL, NULL } }; +/******************* UTF-8 ********************/ + +/* + * Adapted from Lua 5.3 + * Needed because libidn does not validate that input is valid UTF-8 + */ + +#define MAXUNICODE 0x10FFFF + +/* + * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. + */ +static const char *utf8_decode (const char *o, int *val) { + static unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF}; + const unsigned char *s = (const unsigned char *)o; + unsigned int c = s[0]; + unsigned int res = 0; /* final result */ + if (c < 0x80) /* ascii? */ + res = c; + else { + int count = 0; /* to count number of continuation bytes */ + while (c & 0x40) { /* still have continuation bytes? */ + int cc = s[++count]; /* read next byte */ + if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ + return NULL; /* invalid byte sequence */ + res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ + c <<= 1; /* to test next bit */ + } + res |= ((c & 0x7F) << (count * 5)); /* add first byte */ + if (count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff) ) + return NULL; /* invalid byte sequence */ + s += count; /* skip continuation bytes read */ + } + if (val) *val = res; + return (const char *)s + 1; /* +1 to include first byte */ +} + +/* + * Check that a string is valid UTF-8 + * Returns NULL if not + */ +const char* check_utf8 (lua_State *L, int idx, size_t *l) { + size_t pos, len; + const char *s = luaL_checklstring(L, 1, &len); + pos = 0; + while (pos <= len) { + const char *s1 = utf8_decode(s + pos, NULL); + if (s1 == NULL) { /* conversion error? */ + return NULL; + } + pos = s1 - s; + } + if(l != NULL) { + *l = len; + } + return s; +} + + /***************** STRINGPREP *****************/ #ifdef USE_STRINGPREP_ICU @@ -212,8 +272,8 @@ static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) lua_pushnil(L); return 1; } - s = lua_tolstring(L, 1, &len); - if (len >= 1024) { + s = check_utf8(L, 1, &len); + if (s == NULL || len >= 1024 || len != strlen(s)) { lua_pushnil(L); return 1; /* TODO return error message */ } @@ -320,7 +380,11 @@ static int Lidna_to_unicode(lua_State *L) /** idna.to_unicode(s) */ static int Lidna_to_ascii(lua_State *L) /** idna.to_ascii(s) */ { size_t len; - const char *s = luaL_checklstring(L, 1, &len); + const char *s = check_utf8(L, 1, &len); + if (s == NULL || len != strlen(s)) { + lua_pushnil(L); + return 1; /* TODO return error message */ + } char* output = NULL; int ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES); if (ret == IDNA_SUCCESS) { -- cgit v1.2.3 From 7c8cc6aa134e6b58bc1cf0f4bfd143a405e12d67 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Mon, 23 Mar 2015 14:27:30 +0100 Subject: util.encodings: Expose UTF-8 validation and length checking functions --- util-src/encodings.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index 898add1a..91826ca4 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -175,6 +175,29 @@ const char* check_utf8 (lua_State *L, int idx, size_t *l) { return s; } +static int Lutf8_valid(lua_State *L) { + lua_pushboolean(L, check_utf8(L, 1, NULL) != NULL); + return 1; +} + +static int Lutf8_length(lua_State *L) { + size_t len; + if(!check_utf8(L, 1, &len)) { + lua_pushnil(L); + lua_pushliteral(L, "invalid utf8"); + return 2; + } + lua_pushinteger(L, len); + return 1; +} + +static const luaL_Reg Reg_utf8[] = +{ + { "valid", Lutf8_valid }, + { "length", Lutf8_length }, + { NULL, NULL } +}; + /***************** STRINGPREP *****************/ #ifdef USE_STRINGPREP_ICU @@ -452,6 +475,11 @@ LUALIB_API int luaopen_util_encodings(lua_State *L) luaL_register(L, NULL, Reg_idna); lua_settable(L,-3); + lua_pushliteral(L, "utf8"); + lua_newtable(L); + luaL_register(L, NULL, Reg_utf8); + lua_settable(L, -3); + lua_pushliteral(L, "version"); /** version */ lua_pushliteral(L, "-3.14"); lua_settable(L,-3); -- cgit v1.2.3