From 43aed81cb51776952ce8c6d211369bc49a05aa67 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Wed, 24 Apr 2019 22:40:38 +0200 Subject: util.encodings: Add binding to confusables skeleton function in ICU --- util-src/encodings.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index e55a3f44..0d723913 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -268,6 +268,7 @@ static const luaL_Reg Reg_utf8[] = { #include #include #include +#include static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) { size_t input_len; @@ -321,6 +322,7 @@ UStringPrepProfile *icu_nameprep; UStringPrepProfile *icu_nodeprep; UStringPrepProfile *icu_resourceprep; UStringPrepProfile *icu_saslprep; +USpoofChecker *icu_spoofcheck; /* initialize global ICU stringprep profiles */ void init_icu() { @@ -330,6 +332,8 @@ void init_icu() { icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err); icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err); icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err); + icu_spoofcheck = uspoof_open(&err); + uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err); if(U_FAILURE(err)) { fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err)); @@ -477,6 +481,40 @@ static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */ } } +static int Lskeleton(lua_State *L) { + size_t len; + int32_t ulen, dest_len, output_len; + const char *s = luaL_checklstring(L, 1, &len); + UErrorCode err = U_ZERO_ERROR; + UChar ustr[1024]; + UChar dest[1024]; + char output[1024]; + + u_strFromUTF8(ustr, 1024, &ulen, s, len, &err); + + if(U_FAILURE(err)) { + lua_pushnil(L); + return 1; + } + + dest_len = uspoof_getSkeleton(icu_spoofcheck, 0, ustr, ulen, dest, 1024, &err); + + if(U_FAILURE(err)) { + lua_pushnil(L); + return 1; + } + + u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err); + + if(U_SUCCESS(err)) { + lua_pushlstring(L, output, output_len); + return 1; + } + + lua_pushnil(L); + return 1; +} + #else /* USE_STRINGPREP_ICU */ /****************** libidn ********************/ @@ -558,6 +596,13 @@ LUALIB_API int luaopen_util_encodings(lua_State *L) { luaL_setfuncs(L, Reg_utf8, 0); lua_setfield(L, -2, "utf8"); +#ifdef USE_STRINGPREP_ICU + lua_newtable(L); + lua_pushcfunction(L, Lskeleton); + lua_setfield(L, -2, "skeleton"); + lua_setfield(L, -2, "confusable"); +#endif + lua_pushliteral(L, "-3.14"); lua_setfield(L, -2, "version"); return 1; -- cgit v1.2.3 From 183b42baa0da20b06ff3429bcc75a8ce01676a1b Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Mon, 29 Apr 2019 15:53:52 +0200 Subject: util.encodings: Add compat with ICU before version 58 --- util-src/encodings.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index 0d723913..3b7f322d 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -324,6 +324,11 @@ UStringPrepProfile *icu_resourceprep; UStringPrepProfile *icu_saslprep; USpoofChecker *icu_spoofcheck; +#if (U_ICU_VERSION_MAJOR_NUM < 58) +/* COMPAT */ +#define USPOOF_CONFUSABLE (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE) +#endif + /* initialize global ICU stringprep profiles */ void init_icu() { UErrorCode err = U_ZERO_ERROR; -- cgit v1.2.3 From 63c03ce6ef0cdb5aa5640e1e71069ec8a1396247 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Mon, 13 May 2019 11:30:45 +0200 Subject: util.encodings: Declare absence of arguments [-Wstrict-prototypes] --- util-src/encodings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index 3b7f322d..5e7032cf 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -330,7 +330,7 @@ USpoofChecker *icu_spoofcheck; #endif /* initialize global ICU stringprep profiles */ -void init_icu() { +void init_icu(void) { UErrorCode err = U_ZERO_ERROR; utrace_setLevel(UTRACE_VERBOSE); icu_nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &err); -- cgit v1.2.3 From bb4cb60fb8200e5fa26eaa482422fbbfa71aa11c Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Wed, 11 Sep 2019 00:14:59 +0200 Subject: util.encodings: Switch ICU binding to IDNA2008 (fixes #533, #1301) --- util-src/encodings.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index 5e7032cf..f20ba75d 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -269,6 +269,7 @@ static const luaL_Reg Reg_utf8[] = { #include #include #include +#include static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) { size_t input_len; @@ -323,6 +324,7 @@ UStringPrepProfile *icu_nodeprep; UStringPrepProfile *icu_resourceprep; UStringPrepProfile *icu_saslprep; USpoofChecker *icu_spoofcheck; +UIDNA *icu_idna2008; #if (U_ICU_VERSION_MAJOR_NUM < 58) /* COMPAT */ @@ -339,6 +341,7 @@ void init_icu(void) { icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err); icu_spoofcheck = uspoof_open(&err); uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err); + icu_idna2008 = uidna_openUTS46(UIDNA_USE_STD3_RULES, &err); if(U_FAILURE(err)) { fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err)); @@ -434,9 +437,10 @@ static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */ return 1; } - dest_len = uidna_IDNToASCII(ustr, ulen, dest, 1024, UIDNA_USE_STD3_RULES, NULL, &err); + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + dest_len = uidna_nameToASCII(icu_idna2008, ustr, ulen, dest, 256, &info, &err); - if(U_FAILURE(err)) { + if(U_FAILURE(err) || info.errors) { lua_pushnil(L); return 1; } else { @@ -468,9 +472,10 @@ static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */ return 1; } - dest_len = uidna_IDNToUnicode(ustr, ulen, dest, 1024, UIDNA_USE_STD3_RULES, NULL, &err); + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + dest_len = uidna_nameToUnicode(icu_idna2008, ustr, ulen, dest, 1024, &info, &err); - if(U_FAILURE(err)) { + if(U_FAILURE(err) || info.errors) { lua_pushnil(L); return 1; } else { -- cgit v1.2.3 From bf1a0c2f0577c980394830a346a2637694ae2057 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Wed, 11 Sep 2019 00:40:30 +0200 Subject: util.encodings: Spell out all IDNA 2008 options ICU has --- util-src/encodings.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index f20ba75d..6f2676f2 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -341,7 +341,30 @@ void init_icu(void) { icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err); icu_spoofcheck = uspoof_open(&err); uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err); - icu_idna2008 = uidna_openUTS46(UIDNA_USE_STD3_RULES, &err); + int options = UIDNA_DEFAULT; +#if 0 + /* COMPAT with future Unicode versions */ + options |= UIDNA_ALLOW_UNASSIGNED; +#endif +#if 1 + /* Forbid eg labels starting with _ */ + options |= UIDNA_USE_STD3_RULES; +#endif +#if 0 + /* TODO determine if we need this */ + options |= UIDNA_CHECK_BIDI; +#endif +#if 0 + /* UTS46 makes it sound like these are the responsibility of registrars */ + options |= UIDNA_CHECK_CONTEXTJ; + options |= UIDNA_CHECK_CONTEXTO; +#endif +#if 0 + /* This disables COMPAT with IDNA 2003 */ + options |= UIDNA_NONTRANSITIONAL_TO_ASCII; + options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; +#endif + icu_idna2008 = uidna_openUTS46(options, &err); if(U_FAILURE(err)) { fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err)); -- cgit v1.2.3 From 41a40ab74b17a6a7fea165e5c63b76131b2e36e7 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Wed, 24 Apr 2019 15:01:00 +0200 Subject: util.encodings: Optional strict flag to stringprep --- util-src/encodings.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index 6f2676f2..be34032e 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -276,6 +276,7 @@ static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) int32_t unprepped_len, prepped_len, output_len; const char *input; char output[1024]; + int flags = USPREP_ALLOW_UNASSIGNED; UChar unprepped[1024]; /* Temporary unicode buffer (1024 characters) */ UChar prepped[1024]; @@ -294,6 +295,11 @@ static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) return 1; } + /* strict */ + if(lua_toboolean(L, 2)) { + flags = 0; + } + u_strFromUTF8(unprepped, 1024, &unprepped_len, input, input_len, &err); if(U_FAILURE(err)) { @@ -301,7 +307,7 @@ static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) return 1; } - prepped_len = usprep_prepare(profile, unprepped, unprepped_len, prepped, 1024, USPREP_ALLOW_UNASSIGNED, NULL, &err); + prepped_len = usprep_prepare(profile, unprepped, unprepped_len, prepped, 1024, flags, NULL, &err); if(U_FAILURE(err)) { lua_pushnil(L); @@ -397,6 +403,7 @@ static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) { const char *s; char string[1024]; int ret; + Stringprep_profile_flags flags = 0; if(!lua_isstring(L, 1)) { lua_pushnil(L); @@ -405,13 +412,18 @@ static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) { s = check_utf8(L, 1, &len); + /* strict */ + if(lua_toboolean(L, 2)) { + flags = STRINGPREP_NO_UNASSIGNED; + } + if(s == NULL || len >= 1024 || len != strlen(s)) { lua_pushnil(L); return 1; /* TODO return error message */ } strcpy(string, s); - ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile); + ret = stringprep(string, 1024, flags, profile); if(ret == STRINGPREP_OK) { lua_pushstring(L, string); -- cgit v1.2.3 From 42aeda373c62b60224608b188b4a776afe534511 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Wed, 30 Oct 2019 16:22:44 +0100 Subject: util.encodings: Strictly verify that the 'strict' *prep argument is a boolean This is to prevent mistakes like nodeprep(username:gsub("a","b")) from unintentionally invoking strict mode. --- util-src/encodings.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index be34032e..4fe83c64 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -296,8 +296,11 @@ static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) } /* strict */ - if(lua_toboolean(L, 2)) { - flags = 0; + if(!lua_isnoneornil(L, 2)) { + luaL_checktype(L, 2, LUA_TBOOLEAN); + if(lua_toboolean(L, 2)) { + flags = 0; + } } u_strFromUTF8(unprepped, 1024, &unprepped_len, input, input_len, &err); @@ -413,8 +416,11 @@ static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) { s = check_utf8(L, 1, &len); /* strict */ - if(lua_toboolean(L, 2)) { - flags = STRINGPREP_NO_UNASSIGNED; + if(!lua_isnoneornil(L, 2)) { + luaL_checktype(L, 2, LUA_TBOOLEAN); + if(lua_toboolean(L, 2)) { + flags = STRINGPREP_NO_UNASSIGNED; + } } if(s == NULL || len >= 1024 || len != strlen(s)) { -- cgit v1.2.3 From b679ffe808759514c5dc12151ea7ff28c17c43a1 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Sat, 2 Nov 2019 13:09:54 +0100 Subject: util.encodings: Don't ignore non-strings passed to stringprep functions If you manage to pass a table or something weird to these, you deserve to know. --- util-src/encodings.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'util-src/encodings.c') diff --git a/util-src/encodings.c b/util-src/encodings.c index 4fe83c64..367182b6 100644 --- a/util-src/encodings.c +++ b/util-src/encodings.c @@ -283,12 +283,7 @@ static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) UErrorCode err = U_ZERO_ERROR; - if(!lua_isstring(L, 1)) { - lua_pushnil(L); - return 1; - } - - input = lua_tolstring(L, 1, &input_len); + input = luaL_checklstring(L, 1, &input_len); if(input_len >= 1024) { lua_pushnil(L); @@ -408,11 +403,6 @@ static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) { int ret; Stringprep_profile_flags flags = 0; - if(!lua_isstring(L, 1)) { - lua_pushnil(L); - return 1; - } - s = check_utf8(L, 1, &len); /* strict */ -- cgit v1.2.3