util.human.io: Fix cutting of UTF-8 into pieces
authorKim Alvefur <zash@zash.se>
Fri, 12 Nov 2021 14:21:15 +0100
changeset 11900 93e9f7ae2f9b
parent 11899 d278a4c6da7f
child 11901 e84ea5b58b29
util.human.io: Fix cutting of UTF-8 into pieces Down the rabbit hole we go...
spec/util_human_io_spec.lua
util/human/io.lua
--- a/spec/util_human_io_spec.lua	Fri Nov 12 12:19:01 2021 +0100
+++ b/spec/util_human_io_spec.lua	Fri Nov 12 14:21:15 2021 +0100
@@ -23,6 +23,25 @@
 
 		end);
 	end);
+
+	describe("ellipsis", function()
+		it("works", function()
+			assert.equal("…", human_io.ellipsis("abc", 1));
+			assert.equal("a…", human_io.ellipsis("abc", 2));
+			assert.equal("abc", human_io.ellipsis("abc", 3));
+
+			assert.equal("…", human_io.ellipsis("räksmörgås", 1));
+			assert.equal("r…", human_io.ellipsis("räksmörgås", 2));
+			assert.equal("rä…", human_io.ellipsis("räksmörgås", 3));
+			assert.equal("räk…", human_io.ellipsis("räksmörgås", 4));
+			assert.equal("räks…", human_io.ellipsis("räksmörgås", 5));
+			assert.equal("räksm…", human_io.ellipsis("räksmörgås", 6));
+			assert.equal("räksmö…", human_io.ellipsis("räksmörgås", 7));
+			assert.equal("räksmör…", human_io.ellipsis("räksmörgås", 8));
+			assert.equal("räksmörg…", human_io.ellipsis("räksmörgås", 9));
+			assert.equal("räksmörgås", human_io.ellipsis("räksmörgås", 10));
+		end);
+	end);
 end);
 
 
--- a/util/human/io.lua	Fri Nov 12 12:19:01 2021 +0100
+++ b/util/human/io.lua	Fri Nov 12 14:21:15 2021 +0100
@@ -1,5 +1,9 @@
 local array = require "util.array";
-local utf8 = rawget(_G,"utf8") or require"util.encodings".utf8;
+local utf8 = rawget(_G, "utf8") or require"util.encodings".utf8;
+local len = utf8.len or function(s)
+	local _, count = s:gsub("[%z\001-\127\194-\253][\128-\191]*", "");
+	return count;
+end;
 
 local function getchar(n)
 	local stty_ret = os.execute("stty raw -echo 2>/dev/null");
@@ -96,11 +100,21 @@
 	return string.rep(" ", width-#s)..s;
 end
 
+local pat = "[%z\001-\127\194-\253][\128-\191]*";
+local function utf8_cut(s, pos)
+	return s:match("^"..pat:rep(pos)) or s;
+end
+
+if utf8.len and utf8.offset then
+	function utf8_cut(s, pos)
+		return s:sub(1, utf8.offset(s, pos+1)-1);
+	end
+end
+
 local function ellipsis(s, width)
-	if #s <= width then return s; end
-	s = s:sub(1, width - 1)
-	while not utf8.len(s) do s = s:sub(1, -2); end
-	return s .. "…";
+	if len(s) <= width then return s; end
+	if width == 1 then return "…"; end
+	return utf8_cut(s, width - 1) .. "…";
 end
 
 local function new_table(col_specs, max_width)
@@ -148,13 +162,13 @@
 			else
 				v = tostring(v);
 			end
-			if #v < width then
+			if len(v) < width then
 				if column.align == "right" then
 					v = padleft(v, width);
 				else
 					v = padright(v, width);
 				end
-			elseif #v > width then
+			elseif len(v) > width then
 				v = ellipsis(v, width);
 			end
 			table.insert(output, v);