aboutsummaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
authorJonas Schäfer <jonas@wielicki.name>2021-04-18 11:47:41 +0200
committerJonas Schäfer <jonas@wielicki.name>2021-04-18 11:47:41 +0200
commit5d597837f5eed9856086375b944862e61c664ed9 (patch)
tree66e29fc237d3cb7acb7fc32d6c512b5c0d15dabd /plugins
parent75ca81a26e86df0fae6a39913d9ff2ff480e2211 (diff)
downloadprosody-5d597837f5eed9856086375b944862e61c664ed9.tar.gz
prosody-5d597837f5eed9856086375b944862e61c664ed9.zip
Statistics: Rewrite statistics backends to use OpenMetrics
The metric subsystem of Prosody has had some shortcomings from the perspective of the current state-of-the-art in metric observability. The OpenMetrics standard [0] is a formalization of the data model (and serialization format) of the well-known and widely-used Prometheus [1] software stack. The previous stats subsystem of Prosody did not map well to that format (see e.g. [2] and [3]); the key reason is that it was trying to do too much math on its own ([2]) while lacking first-class support for "families" of metrics ([3]) and structured metric metadata (despite the `extra` argument to metrics, there was no standard way of representing common things like "tags" or "labels"). Even though OpenMetrics has grown from the Prometheus world of monitoring, it maps well to other popular monitoring stacks such as: - InfluxDB (labels can be mapped to tags and fields as necessary) - Carbon/Graphite (labels can be attached to the metric name with dot-separation) - StatsD (see graphite when assuming that graphite is used as backend, which is the default) The util.statsd module has been ported to use the OpenMetrics model as a proof of concept. An implementation which exposes the util.statistics backend data as Prometheus metrics is ready for publishing in prosody-modules (most likely as mod_openmetrics_prometheus to avoid breaking existing 0.11 deployments). At the same time, the previous measure()-based API had one major advantage: It is really simple and easy to use without requiring lots of knowledge about OpenMetrics or similar concepts. For that reason as well as compatibility with existing code, it is preserved and may even be extended in the future. However, code relying on the `stats-updated` event as well as `get_stats` from `statsmanager` will break because the data model has changed completely; in case of `stats-updated`, the code will simply not run (as the event was renamed in order to avoid conflicts); the `get_stats` function has been removed completely (so it will cause a traceback when it is attempted to be used). Note that the measure_*_event methods have been removed from the module API. I was unable to find any uses or documentation and thus deemed they should not be ported. Re-implementation is possible when necessary. [0]: https://openmetrics.io/ [1]: https://prometheus.io/ [2]: #959 [3]: #960
Diffstat (limited to 'plugins')
-rw-r--r--plugins/mod_admin_shell.lua420
1 files changed, 185 insertions, 235 deletions
diff --git a/plugins/mod_admin_shell.lua b/plugins/mod_admin_shell.lua
index 7c2003e0..2e24bdf2 100644
--- a/plugins/mod_admin_shell.lua
+++ b/plugins/mod_admin_shell.lua
@@ -36,6 +36,9 @@ local serialization = require "util.serialization";
local serialize_config = serialization.new ({ fatal = false, unquoted = true});
local time = require "util.time";
+local t_insert = table.insert;
+local t_concat = table.concat;
+
local format_number = require "util.human.units".format;
local format_table = require "util.human.io".table;
@@ -1342,187 +1345,112 @@ local short_units = {
bytes = "B",
};
-local function format_stat(type, unit, value, ref_value)
- ref_value = ref_value or value;
- --do return tostring(value) end
- if not unit then
- if type == "duration" then
- unit = "seconds"
- elseif type == "size" then
- unit = "bytes";
- elseif type == "rate" then
- unit = " events/sec"
- if ref_value < 0.9 then
- unit = "events/min"
- value = value*60;
- if ref_value < 0.6/60 then
- unit = "events/h"
- value = value*60;
- end
+local stats_methods = {};
+
+function stats_methods:render_single_fancy_histogram_ex(print, prefix, metric_family, metric, cumulative)
+ local creation_timestamp, sum, count
+ local buckets = {}
+ local prev_bucket_count = 0
+ for suffix, extra_labels, value in metric:iter_samples() do
+ if suffix == "_created" then
+ creation_timestamp = value
+ elseif suffix == "_sum" then
+ sum = value
+ elseif suffix == "_count" then
+ count = value
+ else
+ local bucket_threshold = extra_labels["le"]
+ local bucket_count
+ if cumulative then
+ bucket_count = value
+ else
+ bucket_count = value - prev_bucket_count
+ prev_bucket_count = value
+ end
+ if bucket_threshold == "+Inf" then
+ t_insert(buckets, {threshold = 1/0, count = bucket_count})
+ elseif bucket_threshold ~= nil then
+ t_insert(buckets, {threshold = tonumber(bucket_threshold), count = bucket_count})
end
- return ("%.3g %s"):format(value, unit);
end
end
- return format_number(value, short_units[unit] or unit or "", unit == "bytes" and 'b' or nil);
-end
-local stats_methods = {};
-function stats_methods:bounds(_lower, _upper)
- for _, stat_info in ipairs(self) do
- local data = stat_info[4];
- if data then
- local lower = _lower or data.min;
- local upper = _upper or data.max;
- local new_data = {
- min = lower;
- max = upper;
- samples = {};
- sample_count = 0;
- count = data.count;
- units = data.units;
- };
- local sum = 0;
- for _, v in ipairs(data.samples) do
- if v > upper then
- break;
- elseif v>=lower then
- table.insert(new_data.samples, v);
- sum = sum + v;
- end
- end
- new_data.sample_count = #new_data.samples;
- stat_info[4] = new_data;
- stat_info[3] = sum/new_data.sample_count;
+ if #buckets == 0 or not creation_timestamp or not sum or not count then
+ print("[no data or not a histogram]")
+ return false
+ end
+
+ local graph_width, graph_height, wscale = #buckets, 10, 1;
+ if graph_width < 8 then
+ wscale = 8
+ elseif graph_width < 16 then
+ wscale = 4
+ elseif graph_width < 32 then
+ wscale = 2
+ end
+ local eighth_chars = " ▁▂▃▄▅▆▇█";
+
+ local max_bin_samples = 0
+ for _, bucket in ipairs(buckets) do
+ if bucket.count > max_bin_samples then
+ max_bin_samples = bucket.count
end
end
- return self;
-end
-function stats_methods:trim(lower, upper)
- upper = upper or (100-lower);
- local statistics = require "util.statistics";
- for _, stat_info in ipairs(self) do
- -- Strip outliers
- local data = stat_info[4];
- if data then
- local new_data = {
- min = statistics.get_percentile(data, lower);
- max = statistics.get_percentile(data, upper);
- samples = {};
- sample_count = 0;
- count = data.count;
- units = data.units;
- };
- local sum = 0;
- for _, v in ipairs(data.samples) do
- if v > new_data.max then
- break;
- elseif v>=new_data.min then
- table.insert(new_data.samples, v);
- sum = sum + v;
- end
+ print("");
+ print(prefix)
+ print(("_"):rep(graph_width*wscale).." "..max_bin_samples);
+ for row = graph_height, 1, -1 do
+ local row_chars = {};
+ local min_eighths, max_eighths = 8, 0;
+ for i = 1, #buckets do
+ local char_eighths = math.ceil(math.max(math.min((graph_height/(max_bin_samples/buckets[i].count))-(row-1), 1), 0)*8);
+ if char_eighths < min_eighths then
+ min_eighths = char_eighths;
+ end
+ if char_eighths > max_eighths then
+ max_eighths = char_eighths;
+ end
+ if char_eighths == 0 then
+ row_chars[i] = ("-"):rep(wscale);
+ else
+ local char = eighth_chars:sub(char_eighths*3+1, char_eighths*3+3);
+ row_chars[i] = char:rep(wscale);
end
- new_data.sample_count = #new_data.samples;
- stat_info[4] = new_data;
- stat_info[3] = sum/new_data.sample_count;
end
+ print(table.concat(row_chars).."|- "..string.format("%.8g", math.ceil((max_bin_samples/graph_height)*(row-0.5))));
end
- return self;
-end
-function stats_methods:max(upper)
- return self:bounds(nil, upper);
+ local legend_pat = string.format("%%%d.%dg", wscale-1, wscale-1)
+ local row = {}
+ for i = 1, #buckets do
+ local threshold = buckets[i].threshold
+ t_insert(row, legend_pat:format(threshold))
+ end
+ t_insert(row, " " .. metric_family.unit)
+ print(t_concat(row, "/"))
+
+ return true
end
-function stats_methods:min(lower)
- return self:bounds(lower, nil);
+function stats_methods:render_single_fancy_histogram(print, prefix, metric_family, metric)
+ return self:render_single_fancy_histogram_ex(print, prefix, metric_family, metric, false)
end
-function stats_methods:summary()
- local statistics = require "util.statistics";
- for _, stat_info in ipairs(self) do
- local type, value, data = stat_info[2], stat_info[3], stat_info[4];
- if data and data.samples then
- table.insert(stat_info.output, string.format("Count: %d (%d captured)",
- data.count,
- data.sample_count
- ));
- table.insert(stat_info.output, string.format("Min: %s Mean: %s Max: %s",
- format_stat(type, data.units, data.min),
- format_stat(type, data.units, value),
- format_stat(type, data.units, data.max)
- ));
- table.insert(stat_info.output, string.format("Q1: %s Median: %s Q3: %s",
- format_stat(type, data.units, statistics.get_percentile(data, 25)),
- format_stat(type, data.units, statistics.get_percentile(data, 50)),
- format_stat(type, data.units, statistics.get_percentile(data, 75))
- ));
- end
- end
- return self;
+function stats_methods:render_single_fancy_histogram_cf(print, prefix, metric_family, metric)
+ -- cf = cumulative frequency
+ return self:render_single_fancy_histogram_ex(print, prefix, metric_family, metric, true)
end
function stats_methods:cfgraph()
for _, stat_info in ipairs(self) do
- local name, type, value, data = unpack(stat_info, 1, 4); -- luacheck: ignore 211
+ local family_name, metric_family = unpack(stat_info, 1, 2)
local function print(s)
table.insert(stat_info.output, s);
end
- if data and data.sample_count and data.sample_count > 0 then
- local raw_histogram = require "util.statistics".get_histogram(data);
-
- local graph_width, graph_height = 50, 10;
- local eighth_chars = " ▁▂▃▄▅▆▇█";
-
- local range = data.max - data.min;
-
- if range > 0 then
- local x_scaling = #raw_histogram/graph_width;
- local histogram = {};
- for i = 1, graph_width do
- histogram[i] = math.max(raw_histogram[i*x_scaling-1] or 0, raw_histogram[i*x_scaling] or 0);
- end
-
- print("");
- print(("_"):rep(52)..format_stat(type, data.units, data.max));
- for row = graph_height, 1, -1 do
- local row_chars = {};
- local min_eighths, max_eighths = 8, 0;
- for i = 1, #histogram do
- local char_eighths = math.ceil(math.max(math.min((graph_height/(data.max/histogram[i]))-(row-1), 1), 0)*8);
- if char_eighths < min_eighths then
- min_eighths = char_eighths;
- end
- if char_eighths > max_eighths then
- max_eighths = char_eighths;
- end
- if char_eighths == 0 then
- row_chars[i] = "-";
- else
- local char = eighth_chars:sub(char_eighths*3+1, char_eighths*3+3);
- row_chars[i] = char;
- end
- end
- print(table.concat(row_chars).."|-"..format_stat(type, data.units, data.max/(graph_height/(row-0.5))));
- end
- print(("\\ "):rep(11));
- local x_labels = {};
- for i = 1, 11 do
- local s = ("%-4s"):format((i-1)*10);
- if #s > 4 then
- s = s:sub(1, 3).."…";
- end
- x_labels[i] = s;
- end
- print(" "..table.concat(x_labels, " "));
- local units = "%";
- local margin = math.floor((graph_width-#units)/2);
- print((" "):rep(margin)..units);
- else
- print("[range too small to graph]");
- end
- print("");
+ if not self:render_family(print, family_name, metric_family, self.render_single_fancy_histogram_cf) then
+ return self
end
end
return self;
@@ -1530,81 +1458,90 @@ end
function stats_methods:histogram()
for _, stat_info in ipairs(self) do
- local name, type, value, data = unpack(stat_info, 1, 4); -- luacheck: ignore 211
+ local family_name, metric_family = unpack(stat_info, 1, 2)
local function print(s)
table.insert(stat_info.output, s);
end
- if not data then
- print("[no data]");
- return self;
- elseif not data.sample_count then
- print("[not a sampled metric type]");
- return self;
+ if not self:render_family(print, family_name, metric_family, self.render_single_fancy_histogram) then
+ return self
end
+ end
+ return self;
+end
- local graph_width, graph_height = 50, 10;
- local eighth_chars = " ▁▂▃▄▅▆▇█";
-
- local range = data.max - data.min;
+function stats_methods:render_single_counter(print, prefix, metric_family, metric)
+ local created_timestamp, current_value
+ for suffix, _, value in metric:iter_samples() do
+ if suffix == "_created" then
+ created_timestamp = value
+ elseif suffix == "_total" then
+ current_value = value
+ end
+ end
+ if current_value and created_timestamp then
+ local base_unit = short_units[metric_family.unit] or metric_family.unit
+ local unit = base_unit .. "/s"
+ local factor = 1
+ if base_unit == "s" then
+ -- be smart!
+ unit = "%"
+ factor = 100
+ elseif base_unit == "" then
+ unit = "events/s"
+ end
+ print(("%-50s %s"):format(prefix, format_number(factor * current_value / (self.now - created_timestamp), unit.." [avg]")));
+ end
+end
- if range > 0 then
- local n_buckets = graph_width;
+function stats_methods:render_single_gauge(print, prefix, metric_family, metric)
+ local current_value
+ for _, _, value in metric:iter_samples() do
+ current_value = value
+ end
+ if current_value then
+ local unit = short_units[metric_family.unit] or metric_family.unit
+ print(("%-50s %s"):format(prefix, format_number(current_value, unit)));
+ end
+end
- local histogram = {};
- for i = 1, n_buckets do
- histogram[i] = 0;
- end
- local max_bin_samples = 0;
- for _, d in ipairs(data.samples) do
- local bucket = math.floor(1+(n_buckets-1)/(range/(d-data.min)));
- histogram[bucket] = histogram[bucket] + 1;
- if histogram[bucket] > max_bin_samples then
- max_bin_samples = histogram[bucket];
- end
- end
+function stats_methods:render_single_summary(print, prefix, metric_family, metric)
+ local sum, count
+ for suffix, _, value in metric:iter_samples() do
+ if suffix == "_sum" then
+ sum = value
+ elseif suffix == "_count" then
+ count = value
+ end
+ end
+ if sum and count then
+ local unit = short_units[metric_family.unit] or metric_family.unit
+ if count == 0 then
+ print(("%-50s %s"):format(prefix, "no obs."));
+ else
+ print(("%-50s %s"):format(prefix, format_number(sum / count, unit.."/event [avg]")));
+ end
+ end
+end
- print("");
- print(("_"):rep(52)..max_bin_samples);
- for row = graph_height, 1, -1 do
- local row_chars = {};
- local min_eighths, max_eighths = 8, 0;
- for i = 1, #histogram do
- local char_eighths = math.ceil(math.max(math.min((graph_height/(max_bin_samples/histogram[i]))-(row-1), 1), 0)*8);
- if char_eighths < min_eighths then
- min_eighths = char_eighths;
- end
- if char_eighths > max_eighths then
- max_eighths = char_eighths;
- end
- if char_eighths == 0 then
- row_chars[i] = "-";
- else
- local char = eighth_chars:sub(char_eighths*3+1, char_eighths*3+3);
- row_chars[i] = char;
- end
- end
- print(table.concat(row_chars).."|-"..math.ceil((max_bin_samples/graph_height)*(row-0.5)));
- end
- print(("\\ "):rep(11));
- local x_labels = {};
- for i = 1, 11 do
- local s = ("%-4s"):format(format_stat(type, data.units, data.min+range*i/11, data.min):match("^%S+"));
- if #s > 4 then
- s = s:sub(1, 3).."…";
- end
- x_labels[i] = s;
+function stats_methods:render_family(print, family_name, metric_family, render_func)
+ local labelkeys = metric_family.label_keys
+ if #labelkeys > 0 then
+ print(family_name)
+ for labelset, metric in metric_family:iter_metrics() do
+ local labels = {}
+ for i, k in ipairs(labelkeys) do
+ local v = labelset[i]
+ t_insert(labels, ("%s=%s"):format(k, v))
end
- print(" "..table.concat(x_labels, " "));
- local units = format_stat(type, data.units, data.min):match("%s+(.+)$") or data.units or "";
- local margin = math.floor((graph_width-#units)/2);
- print((" "):rep(margin)..units);
- else
- print("[range too small to graph]");
+ local prefix = " "..t_concat(labels, " ")
+ render_func(self, print, prefix, metric_family, metric)
+ end
+ else
+ for _, metric in metric_family:iter_metrics() do
+ render_func(self, print, family_name, metric_family, metric)
end
- print("");
end
- return self;
end
local function stats_tostring(stats)
@@ -1618,7 +1555,14 @@ local function stats_tostring(stats)
end
print("");
else
- print(("%-50s %s"):format(stat_info[1], format_stat(stat_info[2], (stat_info[4] or {}).units, stat_info[3])));
+ local metric_family = stat_info[2]
+ if metric_family.type_ == "counter" then
+ stats:render_family(print, stat_info[1], metric_family, stats.render_single_counter)
+ elseif metric_family.type_ == "gauge" or metric_family.type_ == "unknown" then
+ stats:render_family(print, stat_info[1], metric_family, stats.render_single_gauge)
+ elseif metric_family.type_ == "summary" or metric_family.type_ == "histogram" then
+ stats:render_family(print, stat_info[1], metric_family, stats.render_single_summary)
+ end
end
end
return #stats.." statistics displayed";
@@ -1626,23 +1570,29 @@ end
local stats_mt = {__index = stats_methods, __tostring = stats_tostring }
local function new_stats_context(self)
- return setmetatable({ session = self.session, stats = true }, stats_mt);
+ -- TODO: instead of now(), it might be better to take the time of the last
+ -- interval, if the statistics backend is set to use periodic collection
+ -- Otherwise we get strange stuff like average cpu usage decreasing until
+ -- the next sample and so on.
+ return setmetatable({ session = self.session, stats = true, now = time.now() }, stats_mt);
end
-function def_env.stats:show(filter)
- -- luacheck: ignore 211/changed
- local stats, changed, extra = require "core.statsmanager".get_stats();
- local available, displayed = 0, 0;
+function def_env.stats:show(name_filter)
+ local statsman = require "core.statsmanager"
+ local collect = statsman.collect
+ if collect then
+ -- force collection if in manual mode
+ collect()
+ end
+ local metric_registry = statsman.get_metric_registry();
local displayed_stats = new_stats_context(self);
- for name, value in iterators.sorted_pairs(stats) do
- available = available + 1;
- if not filter or name:match(filter) then
- displayed = displayed + 1;
- local type = name:match(":(%a+)$");
+ for family_name, metric_family in iterators.sorted_pairs(metric_registry:get_metric_families()) do
+ if not name_filter or family_name:match(name_filter) then
table.insert(displayed_stats, {
- name, type, value, extra[name];
- output = {};
- });
+ family_name,
+ metric_family,
+ output = {}
+ })
end
end
return displayed_stats;