util.format: Escape invalid UTF-8 by passing trough serialization
Should prevent invalid UTF-8 from making it into the logs, which can
cause trouble with terminals or log viewers or other tools, such as when
grep determines that log files are binary.
--- a/spec/util_format_spec.lua Fri Dec 10 22:25:34 2021 +0100
+++ b/spec/util_format_spec.lua Fri Dec 10 22:48:45 2021 +0100
@@ -20,5 +20,9 @@
assert.equal("␁", format("%s", "\1"));
end);
+ it("escapes invalid UTF-8", function ()
+ assert.equal("\"Hello w\\195rld\"", format("%s", "Hello w\195rld"));
+ end);
+
end);
end);
--- a/util/format.lua Fri Dec 10 22:25:34 2021 +0100
+++ b/util/format.lua Fri Dec 10 22:48:45 2021 +0100
@@ -5,6 +5,7 @@
local tostring = tostring;
local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack
local pack = require "util.table".pack; -- TODO table.pack in 5.2+
+local valid_utf8 = require "util.encodings".utf8.valid;
local type = type;
local dump = require "util.serialization".new("debug");
local num_type = math.type or function (n)
@@ -60,10 +61,18 @@
args[i] = dump(arg);
spec = "%s";
elseif option == "s" then
- args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+ arg = tostring(arg);
+ if arg:find("[\128-\255]") and not valid_utf8(arg) then
+ args[i] = dump(arg);
+ else
+ args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+ end
elseif type(arg) ~= "number" then -- arg isn't number as expected?
args[i] = tostring(arg);
spec = "[%s]";
+ option = "s";
+ spec = "[%s]";
+ t = "string";
elseif expects_integer[option] and num_type(arg) ~= "integer" then
args[i] = tostring(arg);
spec = "[%s]";