# HG changeset patch # User Kim Alvefur # Date 1636723275 -3600 # Node ID 93e9f7ae2f9bd1201d4f97c28e87d65a01f04c0a # Parent d278a4c6da7f78114fc36fadbc4b27d37689c50e util.human.io: Fix cutting of UTF-8 into pieces Down the rabbit hole we go... diff -r d278a4c6da7f -r 93e9f7ae2f9b spec/util_human_io_spec.lua --- a/spec/util_human_io_spec.lua Fri Nov 12 12:19:01 2021 +0100 +++ b/spec/util_human_io_spec.lua Fri Nov 12 14:21:15 2021 +0100 @@ -23,6 +23,25 @@ end); end); + + describe("ellipsis", function() + it("works", function() + assert.equal("…", human_io.ellipsis("abc", 1)); + assert.equal("a…", human_io.ellipsis("abc", 2)); + assert.equal("abc", human_io.ellipsis("abc", 3)); + + assert.equal("…", human_io.ellipsis("räksmörgås", 1)); + assert.equal("r…", human_io.ellipsis("räksmörgås", 2)); + assert.equal("rä…", human_io.ellipsis("räksmörgås", 3)); + assert.equal("räk…", human_io.ellipsis("räksmörgås", 4)); + assert.equal("räks…", human_io.ellipsis("räksmörgås", 5)); + assert.equal("räksm…", human_io.ellipsis("räksmörgås", 6)); + assert.equal("räksmö…", human_io.ellipsis("räksmörgås", 7)); + assert.equal("räksmör…", human_io.ellipsis("räksmörgås", 8)); + assert.equal("räksmörg…", human_io.ellipsis("räksmörgås", 9)); + assert.equal("räksmörgås", human_io.ellipsis("räksmörgås", 10)); + end); + end); end); diff -r d278a4c6da7f -r 93e9f7ae2f9b util/human/io.lua --- a/util/human/io.lua Fri Nov 12 12:19:01 2021 +0100 +++ b/util/human/io.lua Fri Nov 12 14:21:15 2021 +0100 @@ -1,5 +1,9 @@ local array = require "util.array"; -local utf8 = rawget(_G,"utf8") or require"util.encodings".utf8; +local utf8 = rawget(_G, "utf8") or require"util.encodings".utf8; +local len = utf8.len or function(s) + local _, count = s:gsub("[%z\001-\127\194-\253][\128-\191]*", ""); + return count; +end; local function getchar(n) local stty_ret = os.execute("stty raw -echo 2>/dev/null"); @@ -96,11 +100,21 @@ return string.rep(" ", width-#s)..s; end +local pat = "[%z\001-\127\194-\253][\128-\191]*"; +local function utf8_cut(s, pos) + return s:match("^"..pat:rep(pos)) or s; +end + +if utf8.len and utf8.offset then + function utf8_cut(s, pos) + return s:sub(1, utf8.offset(s, pos+1)-1); + end +end + local function ellipsis(s, width) - if #s <= width then return s; end - s = s:sub(1, width - 1) - while not utf8.len(s) do s = s:sub(1, -2); end - return s .. "…"; + if len(s) <= width then return s; end + if width == 1 then return "…"; end + return utf8_cut(s, width - 1) .. "…"; end local function new_table(col_specs, max_width) @@ -148,13 +162,13 @@ else v = tostring(v); end - if #v < width then + if len(v) < width then if column.align == "right" then v = padleft(v, width); else v = padright(v, width); end - elseif #v > width then + elseif len(v) > width then v = ellipsis(v, width); end table.insert(output, v);