diff options
author | Kim Alvefur <zash@zash.se> | 2021-12-10 22:48:45 +0100 |
---|---|---|
committer | Kim Alvefur <zash@zash.se> | 2021-12-10 22:48:45 +0100 |
commit | d4c145179420e6de0d2189059ea988072a7e7e45 (patch) | |
tree | 9e4186432ce5813a91c4774b0ce9b22d3367d8d4 /util/format.lua | |
parent | ab4991e8297bf1055bee64d7eca9651b010046bf (diff) | |
download | prosody-d4c145179420e6de0d2189059ea988072a7e7e45.tar.gz prosody-d4c145179420e6de0d2189059ea988072a7e7e45.zip |
util.format: Escape invalid UTF-8 by passing trough serialization
Should prevent invalid UTF-8 from making it into the logs, which can
cause trouble with terminals or log viewers or other tools, such as when
grep determines that log files are binary.
Diffstat (limited to 'util/format.lua')
-rw-r--r-- | util/format.lua | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/util/format.lua b/util/format.lua index 43097e6a..d452fd3d 100644 --- a/util/format.lua +++ b/util/format.lua @@ -5,6 +5,7 @@ local tostring = tostring; local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack local pack = require "util.table".pack; -- TODO table.pack in 5.2+ +local valid_utf8 = require "util.encodings".utf8.valid; local type = type; local dump = require "util.serialization".new("debug"); local num_type = math.type or function (n) @@ -60,10 +61,18 @@ local function format(formatstring, ...) args[i] = dump(arg); spec = "%s"; elseif option == "s" then - args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t"); + arg = tostring(arg); + if arg:find("[\128-\255]") and not valid_utf8(arg) then + args[i] = dump(arg); + else + args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t"); + end elseif type(arg) ~= "number" then -- arg isn't number as expected? args[i] = tostring(arg); spec = "[%s]"; + option = "s"; + spec = "[%s]"; + t = "string"; elseif expects_integer[option] and num_type(arg) ~= "integer" then args[i] = tostring(arg); spec = "[%s]"; |