util/format.lua
author Kim Alvefur <zash@zash.se>
Fri, 10 Dec 2021 22:48:45 +0100
changeset 12035 87bc26f23d9b
parent 11652 96d3cbeb9275
child 12036 3db09eb4c43b
permissions -rw-r--r--
util.format: Escape invalid UTF-8 by passing trough serialization Should prevent invalid UTF-8 from making it into the logs, which can cause trouble with terminals or log viewers or other tools, such as when grep determines that log files are binary.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     1
--
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     2
-- A string.format wrapper that gracefully handles invalid arguments
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     3
--
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     4
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     5
local tostring = tostring;
8420
e88db5668cfb util.format: Import unpack from table lib in Lua 5.2+
Kim Alvefur <zash@zash.se>
parents: 8386
diff changeset
     6
local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack
9691
8c92ef4270c9 util.format: Use pack from util.table
Kim Alvefur <zash@zash.se>
parents: 9660
diff changeset
     7
local pack = require "util.table".pack; -- TODO table.pack in 5.2+
12035
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
     8
local valid_utf8 = require "util.encodings".utf8.valid;
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     9
local type = type;
9697
6ed0d6224d64 util.format: Serialize values for the %q format
Kim Alvefur <zash@zash.se>
parents: 9691
diff changeset
    10
local dump = require "util.serialization".new("debug");
10039
386f085820e6 util.format: Handle integer formats the same way on Lua versions without integer support
Kim Alvefur <zash@zash.se>
parents: 10038
diff changeset
    11
local num_type = math.type or function (n)
386f085820e6 util.format: Handle integer formats the same way on Lua versions without integer support
Kim Alvefur <zash@zash.se>
parents: 10038
diff changeset
    12
	return n % 1 == 0 and n <= 9007199254740992 and n >= -9007199254740992 and "integer" or "float";
386f085820e6 util.format: Handle integer formats the same way on Lua versions without integer support
Kim Alvefur <zash@zash.se>
parents: 10038
diff changeset
    13
end
10038
4fca92d60040 util.format: Handle formats expecting an integer in Lua 5.3+ (fixes #1371)
Kim Alvefur <zash@zash.se>
parents: 9697
diff changeset
    14
10039
386f085820e6 util.format: Handle integer formats the same way on Lua versions without integer support
Kim Alvefur <zash@zash.se>
parents: 10038
diff changeset
    15
-- In Lua 5.3+ these formats throw an error if given a float
386f085820e6 util.format: Handle integer formats the same way on Lua versions without integer support
Kim Alvefur <zash@zash.se>
parents: 10038
diff changeset
    16
local expects_integer = { c = true, d = true, i = true, o = true, u = true, X = true, x = true, };
11642
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    17
-- Printable Unicode replacements for control characters
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    18
local control_symbols = {
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    19
	-- 0x00 .. 0x1F --> U+2400 .. U+241F, 0x7F --> U+2421
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    20
	["\000"] = "\226\144\128", ["\001"] = "\226\144\129", ["\002"] = "\226\144\130",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    21
	["\003"] = "\226\144\131", ["\004"] = "\226\144\132", ["\005"] = "\226\144\133",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    22
	["\006"] = "\226\144\134", ["\007"] = "\226\144\135", ["\008"] = "\226\144\136",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    23
	["\009"] = "\226\144\137", ["\010"] = "\226\144\138", ["\011"] = "\226\144\139",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    24
	["\012"] = "\226\144\140", ["\013"] = "\226\144\141", ["\014"] = "\226\144\142",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    25
	["\015"] = "\226\144\143", ["\016"] = "\226\144\144", ["\017"] = "\226\144\145",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    26
	["\018"] = "\226\144\146", ["\019"] = "\226\144\147", ["\020"] = "\226\144\148",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    27
	["\021"] = "\226\144\149", ["\022"] = "\226\144\150", ["\023"] = "\226\144\151",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    28
	["\024"] = "\226\144\152", ["\025"] = "\226\144\153", ["\026"] = "\226\144\154",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    29
	["\027"] = "\226\144\155", ["\028"] = "\226\144\156", ["\029"] = "\226\144\157",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    30
	["\030"] = "\226\144\158", ["\031"] = "\226\144\159", ["\127"] = "\226\144\161",
5f4a657136bc util.format: Escape ASCII control characters in output
Kim Alvefur <zash@zash.se>
parents: 10039
diff changeset
    31
};
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    32
8385
e5d00bf4a4d5 util: Various minor changes to please [luacheck]
Kim Alvefur <zash@zash.se>
parents: 8228
diff changeset
    33
local function format(formatstring, ...)
9691
8c92ef4270c9 util.format: Use pack from util.table
Kim Alvefur <zash@zash.se>
parents: 9660
diff changeset
    34
	local args = pack(...);
8c92ef4270c9 util.format: Use pack from util.table
Kim Alvefur <zash@zash.se>
parents: 9660
diff changeset
    35
	local args_length = args.n;
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    36
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    37
	-- format specifier spec:
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    38
	-- 1. Start: '%%'
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    39
	-- 2. Flags: '[%-%+ #0]'
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    40
	-- 3. Width: '%d?%d?'
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    41
	-- 4. Precision: '%.?%d?%d?'
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    42
	-- 5. Option: '[cdiouxXaAeEfgGqs%%]'
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    43
	--
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    44
	-- The options c, d, E, e, f, g, G, i, o, u, X, and x all expect a number as argument, whereas q and s expect a string.
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    45
	-- This function does not accept string values containing embedded zeros, except as arguments to the q option.
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    46
	-- a and A are only in Lua 5.2+
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    47
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    48
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    49
	-- process each format specifier
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    50
	local i = 0;
8385
e5d00bf4a4d5 util: Various minor changes to please [luacheck]
Kim Alvefur <zash@zash.se>
parents: 8228
diff changeset
    51
	formatstring = formatstring:gsub("%%[^cdiouxXaAeEfgGqs%%]*[cdiouxXaAeEfgGqs%%]", function(spec)
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    52
		if spec ~= "%%" then
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    53
			i = i + 1;
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    54
			local arg = args[i];
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    55
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    56
			local option = spec:sub(-1);
9660
3da6cc927ee6 util.format: Tweak how nil values are handled
Kim Alvefur <zash@zash.se>
parents: 8420
diff changeset
    57
			if arg == nil then
3da6cc927ee6 util.format: Tweak how nil values are handled
Kim Alvefur <zash@zash.se>
parents: 8420
diff changeset
    58
				args[i] = "nil";
11648
fc1b8fe94d04 util.format: Change formatting of nil values to avoid looking like XML
Kim Alvefur <zash@zash.se>
parents: 11642
diff changeset
    59
				spec = "(%s)";
9697
6ed0d6224d64 util.format: Serialize values for the %q format
Kim Alvefur <zash@zash.se>
parents: 9691
diff changeset
    60
			elseif option == "q" then
6ed0d6224d64 util.format: Serialize values for the %q format
Kim Alvefur <zash@zash.se>
parents: 9691
diff changeset
    61
				args[i] = dump(arg);
6ed0d6224d64 util.format: Serialize values for the %q format
Kim Alvefur <zash@zash.se>
parents: 9691
diff changeset
    62
				spec = "%s";
6ed0d6224d64 util.format: Serialize values for the %q format
Kim Alvefur <zash@zash.se>
parents: 9691
diff changeset
    63
			elseif option == "s" then
12035
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    64
				arg = tostring(arg);
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    65
				if arg:find("[\128-\255]") and not valid_utf8(arg) then
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    66
					args[i] = dump(arg);
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    67
				else
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    68
					args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    69
				end
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    70
			elseif type(arg) ~= "number" then -- arg isn't number as expected?
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    71
				args[i] = tostring(arg);
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    72
				spec = "[%s]";
12035
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    73
				option = "s";
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    74
				spec = "[%s]";
87bc26f23d9b util.format: Escape invalid UTF-8 by passing trough serialization
Kim Alvefur <zash@zash.se>
parents: 11652
diff changeset
    75
				t = "string";
10038
4fca92d60040 util.format: Handle formats expecting an integer in Lua 5.3+ (fixes #1371)
Kim Alvefur <zash@zash.se>
parents: 9697
diff changeset
    76
			elseif expects_integer[option] and num_type(arg) ~= "integer" then
4fca92d60040 util.format: Handle formats expecting an integer in Lua 5.3+ (fixes #1371)
Kim Alvefur <zash@zash.se>
parents: 9697
diff changeset
    77
				args[i] = tostring(arg);
4fca92d60040 util.format: Handle formats expecting an integer in Lua 5.3+ (fixes #1371)
Kim Alvefur <zash@zash.se>
parents: 9697
diff changeset
    78
				spec = "[%s]";
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    79
			end
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    80
		end
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    81
		return spec;
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    82
	end);
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    83
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    84
	-- process extra args
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    85
	while i < args_length do
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    86
		i = i + 1;
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    87
		local arg = args[i];
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    88
		if arg == nil then
11648
fc1b8fe94d04 util.format: Change formatting of nil values to avoid looking like XML
Kim Alvefur <zash@zash.se>
parents: 11642
diff changeset
    89
			args[i] = "(nil)";
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    90
		else
11652
96d3cbeb9275 util.format: Escape ASCII control characters also in extra arguments
Kim Alvefur <zash@zash.se>
parents: 11651
diff changeset
    91
			args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    92
		end
8385
e5d00bf4a4d5 util: Various minor changes to please [luacheck]
Kim Alvefur <zash@zash.se>
parents: 8228
diff changeset
    93
		formatstring = formatstring .. " [%s]"
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    94
	end
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    95
8385
e5d00bf4a4d5 util: Various minor changes to please [luacheck]
Kim Alvefur <zash@zash.se>
parents: 8228
diff changeset
    96
	return formatstring:format(unpack(args));
8228
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    97
end
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    98
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    99
return {
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
   100
	format = format;
70cb72f46a3b util.format: A string.format wrapper that gracefully handles invalid arguments
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
   101
};