diff options
author | Kim Alvefur <zash@zash.se> | 2021-12-10 22:48:45 +0100 |
---|---|---|
committer | Kim Alvefur <zash@zash.se> | 2021-12-10 22:48:45 +0100 |
commit | d4c145179420e6de0d2189059ea988072a7e7e45 (patch) | |
tree | 9e4186432ce5813a91c4774b0ce9b22d3367d8d4 | |
parent | ab4991e8297bf1055bee64d7eca9651b010046bf (diff) | |
download | prosody-d4c145179420e6de0d2189059ea988072a7e7e45.tar.gz prosody-d4c145179420e6de0d2189059ea988072a7e7e45.zip |
util.format: Escape invalid UTF-8 by passing trough serialization
Should prevent invalid UTF-8 from making it into the logs, which can
cause trouble with terminals or log viewers or other tools, such as when
grep determines that log files are binary.
-rw-r--r-- | spec/util_format_spec.lua | 4 | ||||
-rw-r--r-- | util/format.lua | 11 |
2 files changed, 14 insertions, 1 deletions
diff --git a/spec/util_format_spec.lua b/spec/util_format_spec.lua index d58c25aa..8d6ba8ad 100644 --- a/spec/util_format_spec.lua +++ b/spec/util_format_spec.lua @@ -20,5 +20,9 @@ describe("util.format", function() assert.equal("␁", format("%s", "\1")); end); + it("escapes invalid UTF-8", function () + assert.equal("\"Hello w\\195rld\"", format("%s", "Hello w\195rld")); + end); + end); end); diff --git a/util/format.lua b/util/format.lua index 43097e6a..d452fd3d 100644 --- a/util/format.lua +++ b/util/format.lua @@ -5,6 +5,7 @@ local tostring = tostring; local unpack = table.unpack or unpack; -- luacheck: ignore 113/unpack local pack = require "util.table".pack; -- TODO table.pack in 5.2+ +local valid_utf8 = require "util.encodings".utf8.valid; local type = type; local dump = require "util.serialization".new("debug"); local num_type = math.type or function (n) @@ -60,10 +61,18 @@ local function format(formatstring, ...) args[i] = dump(arg); spec = "%s"; elseif option == "s" then - args[i] = tostring(arg):gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t"); + arg = tostring(arg); + if arg:find("[\128-\255]") and not valid_utf8(arg) then + args[i] = dump(arg); + else + args[i] = arg:gsub("[%z\1-\8\11-\31\127]", control_symbols):gsub("\n\t?", "\n\t"); + end elseif type(arg) ~= "number" then -- arg isn't number as expected? args[i] = tostring(arg); spec = "[%s]"; + option = "s"; + spec = "[%s]"; + t = "string"; elseif expects_integer[option] and num_type(arg) ~= "integer" then args[i] = tostring(arg); spec = "[%s]"; |