From 7f748556a2e4bc189bc994707138d129af2100a4 Mon Sep 17 00:00:00 2001 From: Matthew Wild Date: Fri, 23 Feb 2024 12:08:37 +0000 Subject: util.strbitop: Add common_prefix_bits() method This returns the number of bits that two strings have in common. It is significantly more efficient than similar calculations in Lua. --- spec/util_strbitop_spec.lua | 44 ++++++++++++++++++++++++++++++++++++++++++++ util-src/strbitop.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/spec/util_strbitop_spec.lua b/spec/util_strbitop_spec.lua index 58a13772..fdb21414 100644 --- a/spec/util_strbitop_spec.lua +++ b/spec/util_strbitop_spec.lua @@ -38,4 +38,48 @@ describe("util.strbitop", function () assert.equal("hello", strbitop.sxor("hello", "")); end); end); + + describe("common_prefix_bits()", function () + local function B(s) + assert(#s%8==0, "Invalid test input: B(s): s should be a multiple of 8 bits in length"); + local byte = 0; + local out_str = {}; + for i = 1, #s do + local bit_ascii = s:byte(i); + if bit_ascii == 49 then -- '1' + byte = byte + 2^((7-(i-1))%8); + elseif bit_ascii ~= 48 then + error("Invalid test input: B(s): s should contain only '0' or '1' characters"); + end + if (i-1)%8 == 7 then + table.insert(out_str, string.char(byte)); + byte = 0; + end + end + return table.concat(out_str); + end + + local _cpb = strbitop.common_prefix_bits; + local function test(a, b) + local Ba, Bb = B(a), B(b); + local ret1 = _cpb(Ba, Bb); + local ret2 = _cpb(Bb, Ba); + assert(ret1 == ret2, ("parameter order should not make a difference to the result (%s, %s) = %d, reversed = %d"):format(a, b, ret1, ret2)); + return ret1; + end + local hex = require "util.hex"; + it("works on single bytes", function () + assert.equal(0, test("00000000", "11111111")); + assert.equal(1, test("10000000", "11111111")); + assert.equal(0, test("01000000", "11111111")); + assert.equal(0, test("01000000", "11111111")); + assert.equal(8, test("11111111", "11111111")); + end); + + it("works on multiple bytes", function () + for i = 0, 16 do + assert.equal(i, test(string.rep("1", i)..string.rep("0", 16-i), "1111111111111111")); + end + end); + end); end); diff --git a/util-src/strbitop.c b/util-src/strbitop.c index 75cfea81..2f6bf6e6 100644 --- a/util-src/strbitop.c +++ b/util-src/strbitop.c @@ -8,6 +8,8 @@ #include #include +#include +#include /* TODO Deduplicate code somehow */ @@ -74,11 +76,46 @@ static int strop_xor(lua_State *L) { return 1; } +unsigned int clz(unsigned char c) { +#if __GNUC__ + return __builtin_clz((unsigned int) c) - ((sizeof(int)-1)*CHAR_BIT); +#else + if(c & 0x80) return 0; + if(c & 0x40) return 1; + if(c & 0x20) return 2; + if(c & 0x10) return 3; + if(c & 0x08) return 4; + if(c & 0x04) return 5; + if(c & 0x02) return 6; + if(c & 0x01) return 7; + return 8; +#endif +} + +LUA_API int strop_common_prefix_bits(lua_State *L) { + size_t a, b, i; + const char *str_a = luaL_checklstring(L, 1, &a); + const char *str_b = luaL_checklstring(L, 2, &b); + + size_t min_len = MIN(a, b); + + for(i=0; i