aboutsummaryrefslogtreecommitdiffstats
path: root/util-src/encodings.c
diff options
context:
space:
mode:
authorKim Alvefur <zash@zash.se>2020-11-05 22:31:25 +0100
committerKim Alvefur <zash@zash.se>2020-11-05 22:31:25 +0100
commit0c94d96263cffcdbe0cf5ea59ef0b172b32258c2 (patch)
tree58547de6e7795740633c1b93e67c217eb621fe8f /util-src/encodings.c
parent20cb21003f0374e7078e1a29ffb36a7028c6b9ef (diff)
parent4afbfc6854ebc374acc34729fdc6e472b44b07f1 (diff)
downloadprosody-0c94d96263cffcdbe0cf5ea59ef0b172b32258c2.tar.gz
prosody-0c94d96263cffcdbe0cf5ea59ef0b172b32258c2.zip
Merge 0.11->trunk
Diffstat (limited to 'util-src/encodings.c')
-rw-r--r--util-src/encodings.c169
1 files changed, 129 insertions, 40 deletions
diff --git a/util-src/encodings.c b/util-src/encodings.c
index e55a3f44..72264da8 100644
--- a/util-src/encodings.c
+++ b/util-src/encodings.c
@@ -24,6 +24,9 @@
#if (LUA_VERSION_NUM == 501)
#define luaL_setfuncs(L, R, N) luaL_register(L, NULL, R)
#endif
+#if (LUA_VERSION_NUM < 504)
+#define luaL_pushfail lua_pushnil
+#endif
/***************** BASE64 *****************/
@@ -216,7 +219,7 @@ static const char *utf8_decode(const char *o, int *val) {
* Check that a string is valid UTF-8
* Returns NULL if not
*/
-const char *check_utf8(lua_State *L, int idx, size_t *l) {
+static const char *check_utf8(lua_State *L, int idx, size_t *l) {
size_t pos, len;
const char *s = luaL_checklstring(L, idx, &len);
pos = 0;
@@ -247,7 +250,7 @@ static int Lutf8_length(lua_State *L) {
size_t len;
if(!check_utf8(L, 1, &len)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
lua_pushliteral(L, "invalid utf8");
return 2;
}
@@ -268,41 +271,47 @@ static const luaL_Reg Reg_utf8[] = {
#include <unicode/usprep.h>
#include <unicode/ustring.h>
#include <unicode/utrace.h>
+#include <unicode/uspoof.h>
+#include <unicode/uidna.h>
static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) {
size_t input_len;
int32_t unprepped_len, prepped_len, output_len;
const char *input;
char output[1024];
+ int flags = USPREP_ALLOW_UNASSIGNED;
UChar unprepped[1024]; /* Temporary unicode buffer (1024 characters) */
UChar prepped[1024];
UErrorCode err = U_ZERO_ERROR;
- if(!lua_isstring(L, 1)) {
- lua_pushnil(L);
- return 1;
- }
-
- input = lua_tolstring(L, 1, &input_len);
+ input = luaL_checklstring(L, 1, &input_len);
if(input_len >= 1024) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1;
}
+ /* strict */
+ if(!lua_isnoneornil(L, 2)) {
+ luaL_checktype(L, 2, LUA_TBOOLEAN);
+ if(lua_toboolean(L, 2)) {
+ flags = 0;
+ }
+ }
+
u_strFromUTF8(unprepped, 1024, &unprepped_len, input, input_len, &err);
if(U_FAILURE(err)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1;
}
- prepped_len = usprep_prepare(profile, unprepped, unprepped_len, prepped, 1024, USPREP_ALLOW_UNASSIGNED, NULL, &err);
+ prepped_len = usprep_prepare(profile, unprepped, unprepped_len, prepped, 1024, flags, NULL, &err);
if(U_FAILURE(err)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1;
} else {
u_strToUTF8(output, 1024, &output_len, prepped, prepped_len, &err);
@@ -310,29 +319,62 @@ static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile)
if(U_SUCCESS(err) && output_len < 1024) {
lua_pushlstring(L, output, output_len);
} else {
- lua_pushnil(L);
+ luaL_pushfail(L);
}
return 1;
}
}
-UStringPrepProfile *icu_nameprep;
-UStringPrepProfile *icu_nodeprep;
-UStringPrepProfile *icu_resourceprep;
-UStringPrepProfile *icu_saslprep;
+static UStringPrepProfile *icu_nameprep;
+static UStringPrepProfile *icu_nodeprep;
+static UStringPrepProfile *icu_resourceprep;
+static UStringPrepProfile *icu_saslprep;
+static USpoofChecker *icu_spoofcheck;
+static UIDNA *icu_idna2008;
+
+#if (U_ICU_VERSION_MAJOR_NUM < 58)
+/* COMPAT */
+#define USPOOF_CONFUSABLE (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE)
+#endif
/* initialize global ICU stringprep profiles */
-void init_icu() {
+static void init_icu(void) {
UErrorCode err = U_ZERO_ERROR;
utrace_setLevel(UTRACE_VERBOSE);
icu_nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &err);
icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err);
icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err);
icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err);
+ icu_spoofcheck = uspoof_open(&err);
+ uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err);
+ int options = UIDNA_DEFAULT;
+#if 0
+ /* COMPAT with future Unicode versions */
+ options |= UIDNA_ALLOW_UNASSIGNED;
+#endif
+#if 1
+ /* Forbid eg labels starting with _ */
+ options |= UIDNA_USE_STD3_RULES;
+#endif
+#if 0
+ /* TODO determine if we need this */
+ options |= UIDNA_CHECK_BIDI;
+#endif
+#if 0
+ /* UTS46 makes it sound like these are the responsibility of registrars */
+ options |= UIDNA_CHECK_CONTEXTJ;
+ options |= UIDNA_CHECK_CONTEXTO;
+#endif
+#if 0
+ /* This disables COMPAT with IDNA 2003 */
+ options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
+ options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
+#endif
+ icu_idna2008 = uidna_openUTS46(options, &err);
if(U_FAILURE(err)) {
- fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err));
+ fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName(err));
}
}
@@ -362,27 +404,31 @@ static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) {
const char *s;
char string[1024];
int ret;
-
- if(!lua_isstring(L, 1)) {
- lua_pushnil(L);
- return 1;
- }
+ Stringprep_profile_flags flags = 0;
s = check_utf8(L, 1, &len);
+ /* strict */
+ if(!lua_isnoneornil(L, 2)) {
+ luaL_checktype(L, 2, LUA_TBOOLEAN);
+ if(lua_toboolean(L, 2)) {
+ flags = STRINGPREP_NO_UNASSIGNED;
+ }
+ }
+
if(s == NULL || len >= 1024 || len != strlen(s)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1; /* TODO return error message */
}
strcpy(string, s);
- ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile);
+ ret = stringprep(string, 1024, flags, profile);
if(ret == STRINGPREP_OK) {
lua_pushstring(L, string);
return 1;
} else {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1; /* TODO return error message */
}
}
@@ -421,14 +467,15 @@ static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
if(U_FAILURE(err)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1;
}
- dest_len = uidna_IDNToASCII(ustr, ulen, dest, 1024, UIDNA_USE_STD3_RULES, NULL, &err);
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+ dest_len = uidna_nameToASCII(icu_idna2008, ustr, ulen, dest, 256, &info, &err);
- if(U_FAILURE(err)) {
- lua_pushnil(L);
+ if(U_FAILURE(err) || info.errors) {
+ luaL_pushfail(L);
return 1;
} else {
u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
@@ -436,7 +483,7 @@ static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
if(U_SUCCESS(err) && output_len < 1024) {
lua_pushlstring(L, output, output_len);
} else {
- lua_pushnil(L);
+ luaL_pushfail(L);
}
return 1;
@@ -455,14 +502,15 @@ static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
if(U_FAILURE(err)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1;
}
- dest_len = uidna_IDNToUnicode(ustr, ulen, dest, 1024, UIDNA_USE_STD3_RULES, NULL, &err);
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+ dest_len = uidna_nameToUnicode(icu_idna2008, ustr, ulen, dest, 1024, &info, &err);
- if(U_FAILURE(err)) {
- lua_pushnil(L);
+ if(U_FAILURE(err) || info.errors) {
+ luaL_pushfail(L);
return 1;
} else {
u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
@@ -470,13 +518,47 @@ static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
if(U_SUCCESS(err) && output_len < 1024) {
lua_pushlstring(L, output, output_len);
} else {
- lua_pushnil(L);
+ luaL_pushfail(L);
}
return 1;
}
}
+static int Lskeleton(lua_State *L) {
+ size_t len;
+ int32_t ulen, dest_len, output_len;
+ const char *s = luaL_checklstring(L, 1, &len);
+ UErrorCode err = U_ZERO_ERROR;
+ UChar ustr[1024];
+ UChar dest[1024];
+ char output[1024];
+
+ u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
+
+ if(U_FAILURE(err)) {
+ luaL_pushfail(L);
+ return 1;
+ }
+
+ dest_len = uspoof_getSkeleton(icu_spoofcheck, 0, ustr, ulen, dest, 1024, &err);
+
+ if(U_FAILURE(err)) {
+ luaL_pushfail(L);
+ return 1;
+ }
+
+ u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
+
+ if(U_SUCCESS(err)) {
+ lua_pushlstring(L, output, output_len);
+ return 1;
+ }
+
+ luaL_pushfail(L);
+ return 1;
+}
+
#else /* USE_STRINGPREP_ICU */
/****************** libidn ********************/
@@ -490,7 +572,7 @@ static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
int ret;
if(s == NULL || len != strlen(s)) {
- lua_pushnil(L);
+ luaL_pushfail(L);
return 1; /* TODO return error message */
}
@@ -501,7 +583,7 @@ static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
idn_free(output);
return 1;
} else {
- lua_pushnil(L);
+ luaL_pushfail(L);
idn_free(output);
return 1; /* TODO return error message */
}
@@ -518,7 +600,7 @@ static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
idn_free(output);
return 1;
} else {
- lua_pushnil(L);
+ luaL_pushfail(L);
idn_free(output);
return 1; /* TODO return error message */
}
@@ -558,6 +640,13 @@ LUALIB_API int luaopen_util_encodings(lua_State *L) {
luaL_setfuncs(L, Reg_utf8, 0);
lua_setfield(L, -2, "utf8");
+#ifdef USE_STRINGPREP_ICU
+ lua_newtable(L);
+ lua_pushcfunction(L, Lskeleton);
+ lua_setfield(L, -2, "skeleton");
+ lua_setfield(L, -2, "confusable");
+#endif
+
lua_pushliteral(L, "-3.14");
lua_setfield(L, -2, "version");
return 1;