diff options
author | Matthew Wild <mwild1@gmail.com> | 2024-02-23 12:08:37 +0000 |
---|---|---|
committer | Matthew Wild <mwild1@gmail.com> | 2024-02-23 12:08:37 +0000 |
commit | 7f748556a2e4bc189bc994707138d129af2100a4 (patch) | |
tree | 2995372728f2a34844c2026a46faf78ce5851ee1 | |
parent | 1606675762e0ac0d9609d6d5b1d69492d2e29abb (diff) | |
download | prosody-7f748556a2e4bc189bc994707138d129af2100a4.tar.gz prosody-7f748556a2e4bc189bc994707138d129af2100a4.zip |
util.strbitop: Add common_prefix_bits() method
This returns the number of bits that two strings have in common. It is
significantly more efficient than similar calculations in Lua.
-rw-r--r-- | spec/util_strbitop_spec.lua | 44 | ||||
-rw-r--r-- | util-src/strbitop.c | 37 |
2 files changed, 81 insertions, 0 deletions
diff --git a/spec/util_strbitop_spec.lua b/spec/util_strbitop_spec.lua index 58a13772..fdb21414 100644 --- a/spec/util_strbitop_spec.lua +++ b/spec/util_strbitop_spec.lua @@ -38,4 +38,48 @@ describe("util.strbitop", function () assert.equal("hello", strbitop.sxor("hello", "")); end); end); + + describe("common_prefix_bits()", function () + local function B(s) + assert(#s%8==0, "Invalid test input: B(s): s should be a multiple of 8 bits in length"); + local byte = 0; + local out_str = {}; + for i = 1, #s do + local bit_ascii = s:byte(i); + if bit_ascii == 49 then -- '1' + byte = byte + 2^((7-(i-1))%8); + elseif bit_ascii ~= 48 then + error("Invalid test input: B(s): s should contain only '0' or '1' characters"); + end + if (i-1)%8 == 7 then + table.insert(out_str, string.char(byte)); + byte = 0; + end + end + return table.concat(out_str); + end + + local _cpb = strbitop.common_prefix_bits; + local function test(a, b) + local Ba, Bb = B(a), B(b); + local ret1 = _cpb(Ba, Bb); + local ret2 = _cpb(Bb, Ba); + assert(ret1 == ret2, ("parameter order should not make a difference to the result (%s, %s) = %d, reversed = %d"):format(a, b, ret1, ret2)); + return ret1; + end + local hex = require "util.hex"; + it("works on single bytes", function () + assert.equal(0, test("00000000", "11111111")); + assert.equal(1, test("10000000", "11111111")); + assert.equal(0, test("01000000", "11111111")); + assert.equal(0, test("01000000", "11111111")); + assert.equal(8, test("11111111", "11111111")); + end); + + it("works on multiple bytes", function () + for i = 0, 16 do + assert.equal(i, test(string.rep("1", i)..string.rep("0", 16-i), "1111111111111111")); + end + end); + end); end); diff --git a/util-src/strbitop.c b/util-src/strbitop.c index 75cfea81..2f6bf6e6 100644 --- a/util-src/strbitop.c +++ b/util-src/strbitop.c @@ -8,6 +8,8 @@ #include <lua.h> #include <lauxlib.h> +#include <sys/param.h> +#include <limits.h> /* TODO Deduplicate code somehow */ @@ -74,11 +76,46 @@ static int strop_xor(lua_State *L) { return 1; } +unsigned int clz(unsigned char c) { +#if __GNUC__ + return __builtin_clz((unsigned int) c) - ((sizeof(int)-1)*CHAR_BIT); +#else + if(c & 0x80) return 0; + if(c & 0x40) return 1; + if(c & 0x20) return 2; + if(c & 0x10) return 3; + if(c & 0x08) return 4; + if(c & 0x04) return 5; + if(c & 0x02) return 6; + if(c & 0x01) return 7; + return 8; +#endif +} + +LUA_API int strop_common_prefix_bits(lua_State *L) { + size_t a, b, i; + const char *str_a = luaL_checklstring(L, 1, &a); + const char *str_b = luaL_checklstring(L, 2, &b); + + size_t min_len = MIN(a, b); + + for(i=0; i<min_len; i++) { + if(str_a[i] != str_b[i]) { + lua_pushinteger(L, i*8 + (clz(str_a[i] ^ str_b[i]))); + return 1; + } + } + + lua_pushinteger(L, i*8); + return 1; +} + LUA_API int luaopen_prosody_util_strbitop(lua_State *L) { luaL_Reg exports[] = { { "sand", strop_and }, { "sor", strop_or }, { "sxor", strop_xor }, + { "common_prefix_bits", strop_common_prefix_bits }, { NULL, NULL } }; |