From d1144a7ede35eb04e0c81c328e3684eb1f7f3f72 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Fri, 12 Nov 2021 14:21:15 +0100 Subject: util.human.io: Fix cutting of UTF-8 into pieces Down the rabbit hole we go... --- spec/util_human_io_spec.lua | 19 +++++++++++++++++++ util/human/io.lua | 28 +++++++++++++++++++++------- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/spec/util_human_io_spec.lua b/spec/util_human_io_spec.lua index ead117df..f1b28883 100644 --- a/spec/util_human_io_spec.lua +++ b/spec/util_human_io_spec.lua @@ -23,6 +23,25 @@ describe("util.human.io", function () end); end); + + describe("ellipsis", function() + it("works", function() + assert.equal("…", human_io.ellipsis("abc", 1)); + assert.equal("a…", human_io.ellipsis("abc", 2)); + assert.equal("abc", human_io.ellipsis("abc", 3)); + + assert.equal("…", human_io.ellipsis("räksmörgås", 1)); + assert.equal("r…", human_io.ellipsis("räksmörgås", 2)); + assert.equal("rä…", human_io.ellipsis("räksmörgås", 3)); + assert.equal("räk…", human_io.ellipsis("räksmörgås", 4)); + assert.equal("räks…", human_io.ellipsis("räksmörgås", 5)); + assert.equal("räksm…", human_io.ellipsis("räksmörgås", 6)); + assert.equal("räksmö…", human_io.ellipsis("räksmörgås", 7)); + assert.equal("räksmör…", human_io.ellipsis("räksmörgås", 8)); + assert.equal("räksmörg…", human_io.ellipsis("räksmörgås", 9)); + assert.equal("räksmörgås", human_io.ellipsis("räksmörgås", 10)); + end); + end); end); diff --git a/util/human/io.lua b/util/human/io.lua index 09ed2807..83a521c3 100644 --- a/util/human/io.lua +++ b/util/human/io.lua @@ -1,5 +1,9 @@ local array = require "util.array"; -local utf8 = rawget(_G,"utf8") or require"util.encodings".utf8; +local utf8 = rawget(_G, "utf8") or require"util.encodings".utf8; +local len = utf8.len or function(s) + local _, count = s:gsub("[%z\001-\127\194-\253][\128-\191]*", ""); + return count; +end; local function getchar(n) local stty_ret = os.execute("stty raw -echo 2>/dev/null"); @@ -96,11 +100,21 @@ local function padleft(s, width) return string.rep(" ", width-#s)..s; end +local pat = "[%z\001-\127\194-\253][\128-\191]*"; +local function utf8_cut(s, pos) + return s:match("^"..pat:rep(pos)) or s; +end + +if utf8.len and utf8.offset then + function utf8_cut(s, pos) + return s:sub(1, utf8.offset(s, pos+1)-1); + end +end + local function ellipsis(s, width) - if #s <= width then return s; end - s = s:sub(1, width - 1) - while not utf8.len(s) do s = s:sub(1, -2); end - return s .. "…"; + if len(s) <= width then return s; end + if width == 1 then return "…"; end + return utf8_cut(s, width - 1) .. "…"; end local function new_table(col_specs, max_width) @@ -148,13 +162,13 @@ local function new_table(col_specs, max_width) else v = tostring(v); end - if #v < width then + if len(v) < width then if column.align == "right" then v = padleft(v, width); else v = padright(v, width); end - elseif #v > width then + elseif len(v) > width then v = ellipsis(v, width); end table.insert(output, v); -- cgit v1.2.3