aboutsummaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorKim Alvefur <zash@zash.se>2021-12-10 22:48:45 +0100
committerKim Alvefur <zash@zash.se>2021-12-10 22:48:45 +0100
commitd4c145179420e6de0d2189059ea988072a7e7e45 (patch)
tree9e4186432ce5813a91c4774b0ce9b22d3367d8d4 /util
parentab4991e8297bf1055bee64d7eca9651b010046bf (diff)
downloadprosody-d4c145179420e6de0d2189059ea988072a7e7e45.tar.gz
prosody-d4c145179420e6de0d2189059ea988072a7e7e45.zip
util.format: Escape invalid UTF-8 by passing trough serialization
Should prevent invalid UTF-8 from making it into the logs, which can cause trouble with terminals or log viewers or other tools, such as when grep determines that log files are binary.
Diffstat (limited to 'util')
-rw-r--r--util/format.lua11
1 files changed, 10 insertions, 1 deletions
diff --git a/util/format.lua b/util/format.lua
index 43097e6a..d452fd3d 100644
--- a/util/format.lua
+++ b/util/format.lua
@@ -5,6 +5,7 @@
local tostring = tostring;
local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack
local pack = require "util.table".pack; -- TODO table.pack in 5.2+
+local valid_utf8 = require "util.encodings".utf8.valid;
local type = type;
local dump = require "util.serialization".new("debug");
local num_type = math.type or function (n)
@@ -60,10 +61,18 @@ local function format(formatstring, ...)
args[i] = dump(arg);
spec = "%s";
elseif option == "s" then
- args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+ arg = tostring(arg);
+ if arg:find("[\128-\255]") and not valid_utf8(arg) then
+ args[i] = dump(arg);
+ else
+ args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t");
+ end
elseif type(arg) ~= "number" then -- arg isn't number as expected?
args[i] = tostring(arg);
spec = "[%s]";
+ option = "s";
+ spec = "[%s]";
+ t = "string";
elseif expects_integer[option] and num_type(arg) ~= "integer" then
args[i] = tostring(arg);
spec = "[%s]";