util.strbitop: Add common_prefix_bits() method
authorMatthew Wild <mwild1@gmail.com>
Fri, 23 Feb 2024 12:08:37 +0000
changeset 13433 6cdc6923d65a
parent 13432 dc1ad5f3f597
child 13434 1a5e3cf037f6
util.strbitop: Add common_prefix_bits() method This returns the number of bits that two strings have in common. It is significantly more efficient than similar calculations in Lua.
spec/util_strbitop_spec.lua
util-src/strbitop.c
--- a/spec/util_strbitop_spec.lua	Fri Feb 23 12:03:31 2024 +0000
+++ b/spec/util_strbitop_spec.lua	Fri Feb 23 12:08:37 2024 +0000
@@ -38,4 +38,48 @@
 			assert.equal("hello", strbitop.sxor("hello", ""));
 		end);
 	end);
+
+	describe("common_prefix_bits()", function ()
+		local function B(s)
+			assert(#s%8==0, "Invalid test input: B(s): s should be a multiple of 8 bits in length");
+			local byte = 0;
+			local out_str = {};
+			for i = 1, #s do
+				local bit_ascii = s:byte(i);
+				if bit_ascii == 49 then -- '1'
+					byte = byte + 2^((7-(i-1))%8);
+				elseif bit_ascii ~= 48 then
+					error("Invalid test input: B(s): s should contain only '0' or '1' characters");
+				end
+				if (i-1)%8 == 7 then
+					table.insert(out_str, string.char(byte));
+					byte = 0;
+				end
+			end
+			return table.concat(out_str);
+		end
+
+		local _cpb = strbitop.common_prefix_bits;
+		local function test(a, b)
+			local Ba, Bb = B(a), B(b);
+			local ret1 = _cpb(Ba, Bb);
+			local ret2 = _cpb(Bb, Ba);
+			assert(ret1 == ret2, ("parameter order should not make a difference to the result (%s, %s) = %d, reversed = %d"):format(a, b, ret1, ret2));
+			return ret1;
+		end
+		local hex = require "util.hex";
+		it("works on single bytes", function ()
+			assert.equal(0, test("00000000", "11111111"));
+			assert.equal(1, test("10000000", "11111111"));
+			assert.equal(0, test("01000000", "11111111"));
+			assert.equal(0, test("01000000", "11111111"));
+			assert.equal(8, test("11111111", "11111111"));
+		end);
+
+		it("works on multiple bytes", function ()
+			for i = 0, 16 do
+				assert.equal(i, test(string.rep("1", i)..string.rep("0", 16-i), "1111111111111111"));
+			end
+		end);
+	end);
 end);
--- a/util-src/strbitop.c	Fri Feb 23 12:03:31 2024 +0000
+++ b/util-src/strbitop.c	Fri Feb 23 12:08:37 2024 +0000
@@ -8,6 +8,8 @@
 #include <lua.h>
 #include <lauxlib.h>
 
+#include <sys/param.h>
+#include <limits.h>
 
 /* TODO Deduplicate code somehow */
 
@@ -74,11 +76,46 @@
 	return 1;
 }
 
+unsigned int clz(unsigned char c) {
+#if __GNUC__
+	return __builtin_clz((unsigned int) c) - ((sizeof(int)-1)*CHAR_BIT);
+#else
+	if(c & 0x80) return 0;
+	if(c & 0x40) return 1;
+	if(c & 0x20) return 2;
+	if(c & 0x10) return 3;
+	if(c & 0x08) return 4;
+	if(c & 0x04) return 5;
+	if(c & 0x02) return 6;
+	if(c & 0x01) return 7;
+	return 8;
+#endif
+}
+
+LUA_API int strop_common_prefix_bits(lua_State *L) {
+	size_t a, b, i;
+	const char *str_a = luaL_checklstring(L, 1, &a);
+	const char *str_b = luaL_checklstring(L, 2, &b);
+
+	size_t min_len = MIN(a, b);
+
+	for(i=0; i<min_len; i++) {
+		if(str_a[i] != str_b[i]) {
+			lua_pushinteger(L, i*8 + (clz(str_a[i] ^ str_b[i])));
+			return 1;
+		}
+	}
+
+	lua_pushinteger(L, i*8);
+	return 1;
+}
+
 LUA_API int luaopen_prosody_util_strbitop(lua_State *L) {
 	luaL_Reg exports[] = {
 		{ "sand", strop_and },
 		{ "sor",  strop_or },
 		{ "sxor", strop_xor },
+		{ "common_prefix_bits", strop_common_prefix_bits },
 		{ NULL, NULL }
 	};