aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wild <mwild1@gmail.com>2024-02-23 12:08:37 +0000
committerMatthew Wild <mwild1@gmail.com>2024-02-23 12:08:37 +0000
commit7f748556a2e4bc189bc994707138d129af2100a4 (patch)
tree2995372728f2a34844c2026a46faf78ce5851ee1
parent1606675762e0ac0d9609d6d5b1d69492d2e29abb (diff)
downloadprosody-7f748556a2e4bc189bc994707138d129af2100a4.tar.gz
prosody-7f748556a2e4bc189bc994707138d129af2100a4.zip
util.strbitop: Add common_prefix_bits() method
This returns the number of bits that two strings have in common. It is significantly more efficient than similar calculations in Lua.
-rw-r--r--spec/util_strbitop_spec.lua44
-rw-r--r--util-src/strbitop.c37
2 files changed, 81 insertions, 0 deletions
diff --git a/spec/util_strbitop_spec.lua b/spec/util_strbitop_spec.lua
index 58a13772..fdb21414 100644
--- a/spec/util_strbitop_spec.lua
+++ b/spec/util_strbitop_spec.lua
@@ -38,4 +38,48 @@ describe("util.strbitop", function ()
assert.equal("hello", strbitop.sxor("hello", ""));
end);
end);
+
+ describe("common_prefix_bits()", function ()
+ local function B(s)
+ assert(#s%8==0, "Invalid test input: B(s): s should be a multiple of 8 bits in length");
+ local byte = 0;
+ local out_str = {};
+ for i = 1, #s do
+ local bit_ascii = s:byte(i);
+ if bit_ascii == 49 then -- '1'
+ byte = byte + 2^((7-(i-1))%8);
+ elseif bit_ascii ~= 48 then
+ error("Invalid test input: B(s): s should contain only '0' or '1' characters");
+ end
+ if (i-1)%8 == 7 then
+ table.insert(out_str, string.char(byte));
+ byte = 0;
+ end
+ end
+ return table.concat(out_str);
+ end
+
+ local _cpb = strbitop.common_prefix_bits;
+ local function test(a, b)
+ local Ba, Bb = B(a), B(b);
+ local ret1 = _cpb(Ba, Bb);
+ local ret2 = _cpb(Bb, Ba);
+ assert(ret1 == ret2, ("parameter order should not make a difference to the result (%s, %s) = %d, reversed = %d"):format(a, b, ret1, ret2));
+ return ret1;
+ end
+ local hex = require "util.hex";
+ it("works on single bytes", function ()
+ assert.equal(0, test("00000000", "11111111"));
+ assert.equal(1, test("10000000", "11111111"));
+ assert.equal(0, test("01000000", "11111111"));
+ assert.equal(0, test("01000000", "11111111"));
+ assert.equal(8, test("11111111", "11111111"));
+ end);
+
+ it("works on multiple bytes", function ()
+ for i = 0, 16 do
+ assert.equal(i, test(string.rep("1", i)..string.rep("0", 16-i), "1111111111111111"));
+ end
+ end);
+ end);
end);
diff --git a/util-src/strbitop.c b/util-src/strbitop.c
index 75cfea81..2f6bf6e6 100644
--- a/util-src/strbitop.c
+++ b/util-src/strbitop.c
@@ -8,6 +8,8 @@
#include <lua.h>
#include <lauxlib.h>
+#include <sys/param.h>
+#include <limits.h>
/* TODO Deduplicate code somehow */
@@ -74,11 +76,46 @@ static int strop_xor(lua_State *L) {
return 1;
}
+unsigned int clz(unsigned char c) {
+#if __GNUC__
+ return __builtin_clz((unsigned int) c) - ((sizeof(int)-1)*CHAR_BIT);
+#else
+ if(c & 0x80) return 0;
+ if(c & 0x40) return 1;
+ if(c & 0x20) return 2;
+ if(c & 0x10) return 3;
+ if(c & 0x08) return 4;
+ if(c & 0x04) return 5;
+ if(c & 0x02) return 6;
+ if(c & 0x01) return 7;
+ return 8;
+#endif
+}
+
+LUA_API int strop_common_prefix_bits(lua_State *L) {
+ size_t a, b, i;
+ const char *str_a = luaL_checklstring(L, 1, &a);
+ const char *str_b = luaL_checklstring(L, 2, &b);
+
+ size_t min_len = MIN(a, b);
+
+ for(i=0; i<min_len; i++) {
+ if(str_a[i] != str_b[i]) {
+ lua_pushinteger(L, i*8 + (clz(str_a[i] ^ str_b[i])));
+ return 1;
+ }
+ }
+
+ lua_pushinteger(L, i*8);
+ return 1;
+}
+
LUA_API int luaopen_prosody_util_strbitop(lua_State *L) {
luaL_Reg exports[] = {
{ "sand", strop_and },
{ "sor", strop_or },
{ "sxor", strop_xor },
+ { "common_prefix_bits", strop_common_prefix_bits },
{ NULL, NULL }
};