# HG changeset patch # User Matthew Wild # Date 1708690117 0 # Node ID 6cdc6923d65a066b4f4747e5c6c06611024dc1d3 # Parent dc1ad5f3f59752beadab3028b9a33bcb1a5f16ad util.strbitop: Add common_prefix_bits() method This returns the number of bits that two strings have in common. It is significantly more efficient than similar calculations in Lua. diff -r dc1ad5f3f597 -r 6cdc6923d65a spec/util_strbitop_spec.lua --- a/spec/util_strbitop_spec.lua Fri Feb 23 12:03:31 2024 +0000 +++ b/spec/util_strbitop_spec.lua Fri Feb 23 12:08:37 2024 +0000 @@ -38,4 +38,48 @@ assert.equal("hello", strbitop.sxor("hello", "")); end); end); + + describe("common_prefix_bits()", function () + local function B(s) + assert(#s%8==0, "Invalid test input: B(s): s should be a multiple of 8 bits in length"); + local byte = 0; + local out_str = {}; + for i = 1, #s do + local bit_ascii = s:byte(i); + if bit_ascii == 49 then -- '1' + byte = byte + 2^((7-(i-1))%8); + elseif bit_ascii ~= 48 then + error("Invalid test input: B(s): s should contain only '0' or '1' characters"); + end + if (i-1)%8 == 7 then + table.insert(out_str, string.char(byte)); + byte = 0; + end + end + return table.concat(out_str); + end + + local _cpb = strbitop.common_prefix_bits; + local function test(a, b) + local Ba, Bb = B(a), B(b); + local ret1 = _cpb(Ba, Bb); + local ret2 = _cpb(Bb, Ba); + assert(ret1 == ret2, ("parameter order should not make a difference to the result (%s, %s) = %d, reversed = %d"):format(a, b, ret1, ret2)); + return ret1; + end + local hex = require "util.hex"; + it("works on single bytes", function () + assert.equal(0, test("00000000", "11111111")); + assert.equal(1, test("10000000", "11111111")); + assert.equal(0, test("01000000", "11111111")); + assert.equal(0, test("01000000", "11111111")); + assert.equal(8, test("11111111", "11111111")); + end); + + it("works on multiple bytes", function () + for i = 0, 16 do + assert.equal(i, test(string.rep("1", i)..string.rep("0", 16-i), "1111111111111111")); + end + end); + end); end); diff -r dc1ad5f3f597 -r 6cdc6923d65a util-src/strbitop.c --- a/util-src/strbitop.c Fri Feb 23 12:03:31 2024 +0000 +++ b/util-src/strbitop.c Fri Feb 23 12:08:37 2024 +0000 @@ -8,6 +8,8 @@ #include #include +#include +#include /* TODO Deduplicate code somehow */ @@ -74,11 +76,46 @@ return 1; } +unsigned int clz(unsigned char c) { +#if __GNUC__ + return __builtin_clz((unsigned int) c) - ((sizeof(int)-1)*CHAR_BIT); +#else + if(c & 0x80) return 0; + if(c & 0x40) return 1; + if(c & 0x20) return 2; + if(c & 0x10) return 3; + if(c & 0x08) return 4; + if(c & 0x04) return 5; + if(c & 0x02) return 6; + if(c & 0x01) return 7; + return 8; +#endif +} + +LUA_API int strop_common_prefix_bits(lua_State *L) { + size_t a, b, i; + const char *str_a = luaL_checklstring(L, 1, &a); + const char *str_b = luaL_checklstring(L, 2, &b); + + size_t min_len = MIN(a, b); + + for(i=0; i