diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/openmetrics.lua | 308 | ||||
-rw-r--r-- | util/statistics.lua | 326 | ||||
-rw-r--r-- | util/statsd.lua | 297 |
3 files changed, 722 insertions, 209 deletions
diff --git a/util/openmetrics.lua b/util/openmetrics.lua new file mode 100644 index 00000000..299b36c7 --- /dev/null +++ b/util/openmetrics.lua @@ -0,0 +1,308 @@ +--[[ +This module implements a subset of the OpenMetrics Internet Draft version 00. + +URL: https://tools.ietf.org/html/draft-richih-opsawg-openmetrics-00 + +The following metric types are supported: + +- Counter +- Gauge +- Histogram +- Summary + +It is used by util.statsd and util.statistics to provite the OpenMetrics API. + +To understand what this module is about, it is useful to familiarize oneself +with the terms MetricFamily, Metric, LabelSet, Label and MetricPoint as +defined in the I-D linked above. +--]] +-- metric constructor interface: +-- metric_ctor(..., family_name, labels, extra) + +local time = require "util.time".now; +local select = select; +local array = require "util.array"; +local log = require "util.logger".init("util.openmetrics"); +local new_multitable = require "util.multitable".new; +local iter_multitable = require "util.multitable".iter; + +-- BEGIN of Utility: "metric proxy" +-- This allows to wrap a MetricFamily in a proxy which only provides the +-- `with_labels` and `with_partial_label` methods. This allows to pre-set one +-- or more labels on a metric family. This is used in particular via +-- `with_partial_label` by the moduleapi in order to pre-set the `host` label +-- on metrics created in non-global modules. +local metric_proxy_mt = {} +metric_proxy_mt.__index = metric_proxy_mt + +local function new_metric_proxy(metric_family, with_labels_proxy_fun) + return { + _family = metric_family, + with_labels = function(self, ...) + return with_labels_proxy_fun(self._family, ...) + end; + with_partial_label = function(self, label) + return new_metric_proxy(self._family, function(family, ...) + return family:with_labels(label, ...) + end) + end + } +end + +-- END of Utility: "metric proxy" + +local function render_histogram_le(v) + if v == 1/0 then + -- I-D-00: 4.1.2.2.1: + -- Exposers MUST produce output for positive infinity as +Inf. + return "+Inf" + end + + return string.format("%g", v) +end + +-- BEGIN of generic MetricFamily implementation + +local metric_family_mt = {} +metric_family_mt.__index = metric_family_mt + +local function histogram_metric_ctor(orig_ctor, buckets) + return function(family_name, labels, extra) + return orig_ctor(buckets, family_name, labels, extra) + end +end + +local function new_metric_family(backend, type_, family_name, unit, description, label_keys, extra) + local metric_ctor = assert(backend[type_], "statistics backend does not support "..type_.." metrics families") + local labels = label_keys or {} + local user_labels = #labels + if type_ == "histogram" then + local buckets = extra and extra.buckets + if not buckets then + error("no buckets given for histogram metric") + end + buckets = array(buckets) + buckets:push(1/0) -- must have +inf bucket + + metric_ctor = histogram_metric_ctor(metric_ctor, buckets) + end + + local data + if #labels == 0 then + data = metric_ctor(family_name, nil, extra) + else + data = new_multitable() + end + + local mf = { + family_name = family_name, + data = data, + type_ = type_, + unit = unit, + description = description, + user_labels = user_labels, + label_keys = labels, + extra = extra, + _metric_ctor = metric_ctor, + } + setmetatable(mf, metric_family_mt); + return mf +end + +function metric_family_mt:new_metric(labels) + return self._metric_ctor(self.family_name, labels, self.extra) +end + +function metric_family_mt:clear() + for _, metric in self:iter_metrics() do + metric:reset() + end +end + +function metric_family_mt:with_labels(...) + local count = select('#', ...) + if count ~= self.user_labels then + error("number of labels passed to with_labels does not match number of label keys") + end + if count == 0 then + return self.data + end + local metric = self.data:get(...) + if not metric then + local values = table.pack(...) + metric = self:new_metric(values) + values[values.n+1] = metric + self.data:set(table.unpack(values, 1, values.n+1)) + end + return metric +end + +function metric_family_mt:with_partial_label(label) + return new_metric_proxy(self, function (family, ...) + return family:with_labels(label, ...) + end) +end + +function metric_family_mt:iter_metrics() + if #self.label_keys == 0 then + local done = false + return function() + if done then + return nil + end + done = true + return {}, self.data + end + end + local searchkeys = {}; + local nlabels = #self.label_keys + for i=1,nlabels do + searchkeys[i] = nil; + end + local it, state = iter_multitable(self.data, table.unpack(searchkeys, 1, nlabels)) + return function(_s) + local label_values = table.pack(it(_s)) + if label_values.n == 0 then + return nil, nil + end + local metric = label_values[label_values.n] + label_values[label_values.n] = nil + label_values.n = label_values.n - 1 + return label_values, metric + end, state +end + +-- END of generic MetricFamily implementation + +-- BEGIN of MetricRegistry implementation + + +-- Helper to test whether two metrics are "equal". +local function equal_metric_family(mf1, mf2) + if mf1.type_ ~= mf2.type_ then + return false + end + if #mf1.label_keys ~= #mf2.label_keys then + return false + end + -- Ignoring unit here because in general it'll be part of the name anyway + -- So either the unit was moved into/out of the name (which is a valid) + -- thing to do on an upgrade or we would expect not to see any conflicts + -- anyway. + --[[ + if mf1.unit ~= mf2.unit then + return false + end + ]] + for i, key in ipairs(mf1.label_keys) do + if key ~= mf2.label_keys[i] then + return false + end + end + return true +end + +-- If the unit is not empty, add it to the full name as per the I-D spec. +local function compose_name(name, unit) + local full_name = name + if unit and unit ~= "" then + full_name = full_name .. "_" .. unit + end + -- TODO: prohibit certain suffixes used by metrics if where they may cause + -- conflicts + return full_name +end + +local metric_registry_mt = {} +metric_registry_mt.__index = metric_registry_mt + +local function new_metric_registry(backend) + local reg = { + families = {}, + backend = backend, + } + setmetatable(reg, metric_registry_mt) + return reg +end + +function metric_registry_mt:register_metric_family(name, metric_family) + local existing = self.families[name]; + if existing then + if not equal_metric_family(metric_family, existing) then + -- We could either be strict about this, or replace the + -- existing metric family with the new one. + -- Being strict is nice to avoid programming errors / + -- conflicts, but causes issues when a new version of a module + -- is loaded. + -- + -- We will thus assume that the new metric is the correct one; + -- That is probably OK because unless you're reaching down into + -- the util.openmetrics or core.statsmanager API, your metric + -- name is going to be scoped to `prosody_mod_$modulename` + -- anyway and the damage is thus controlled. + -- + -- To make debugging such issues easier, we still log. + log("debug", "replacing incompatible existing metric family %s", name) + -- Below is the code to be strict. + --error("conflicting declarations for metric family "..name) + else + return existing + end + end + self.families[name] = metric_family + return metric_family +end + +function metric_registry_mt:gauge(name, unit, description, labels, extra) + name = compose_name(name, unit) + local mf = new_metric_family(self.backend, "gauge", name, unit, description, labels, extra) + mf = self:register_metric_family(name, mf) + return mf +end + +function metric_registry_mt:counter(name, unit, description, labels, extra) + name = compose_name(name, unit) + local mf = new_metric_family(self.backend, "counter", name, unit, description, labels, extra) + mf = self:register_metric_family(name, mf) + return mf +end + +function metric_registry_mt:histogram(name, unit, description, labels, extra) + name = compose_name(name, unit) + local mf = new_metric_family(self.backend, "histogram", name, unit, description, labels, extra) + mf = self:register_metric_family(name, mf) + return mf +end + +function metric_registry_mt:summary(name, unit, description, labels, extra) + name = compose_name(name, unit) + local mf = new_metric_family(self.backend, "summary", name, unit, description, labels, extra) + mf = self:register_metric_family(name, mf) + return mf +end + +function metric_registry_mt:get_metric_families() + return self.families +end + +-- END of MetricRegistry implementation + +-- BEGIN of general helpers for implementing high-level APIs on top of OpenMetrics + +local function timed(metric) + local t0 = time() + local submitter = assert(metric.sample or metric.set, "metric type cannot be used with timed()") + return function() + local t1 = time() + submitter(metric, t1-t0) + end +end + +-- END of general helpers + +return { + new_metric_proxy = new_metric_proxy; + new_metric_registry = new_metric_registry; + render_histogram_le = render_histogram_le; + timed = timed; +} diff --git a/util/statistics.lua b/util/statistics.lua index db608217..a8401168 100644 --- a/util/statistics.lua +++ b/util/statistics.lua @@ -1,171 +1,191 @@ -local t_sort = table.sort -local m_floor = math.floor; local time = require "util.time".now; +local new_metric_registry = require "util.openmetrics".new_metric_registry; +local render_histogram_le = require "util.openmetrics".render_histogram_le; -local function nop_function() end +-- BEGIN of Metric implementations -local function percentile(arr, length, pc) - local n = pc/100 * (length + 1); - local k, d = m_floor(n), n%1; - if k == 0 then - return arr[1] or 0; - elseif k >= length then - return arr[length]; - end - return arr[k] + d*(arr[k+1] - arr[k]); +-- Gauges +local gauge_metric_mt = {} +gauge_metric_mt.__index = gauge_metric_mt + +local function new_gauge_metric() + local metric = { value = 0 } + setmetatable(metric, gauge_metric_mt) + return metric +end + +function gauge_metric_mt:set(value) + self.value = value +end + +function gauge_metric_mt:add(delta) + self.value = self.value + delta end -local function new_registry(config) - config = config or {}; - local duration_sample_interval = config.duration_sample_interval or 5; - local duration_max_samples = config.duration_max_stored_samples or 5000; +function gauge_metric_mt:reset() + self.value = 0 +end - local function get_distribution_stats(events, n_actual_events, since, new_time, units) - local n_stored_events = #events; - t_sort(events); - local sum = 0; - for i = 1, n_stored_events do - sum = sum + events[i]; +function gauge_metric_mt:iter_samples() + local done = false + return function(_s) + if done then + return nil, true end + done = true + return "", nil, _s.value + end, self +end - return { - samples = events; - sample_count = n_stored_events; - count = n_actual_events, - rate = n_actual_events/(new_time-since); - average = n_stored_events > 0 and sum/n_stored_events or 0, - min = events[1] or 0, - max = events[n_stored_events] or 0, - units = units, - }; - end +-- Counters +local counter_metric_mt = {} +counter_metric_mt.__index = counter_metric_mt + +local function new_counter_metric() + local metric = { + _created = time(), + value = 0, + } + setmetatable(metric, counter_metric_mt) + return metric +end +function counter_metric_mt:set(value) + self.value = value +end - local registry = {}; - local methods; - methods = { - amount = function (name, conf) - local v = conf and conf.initial or 0; - registry[name..":amount"] = function () - return "amount", v, conf; - end - return function (new_v) v = new_v; end - end; - counter = function (name, conf) - local v = conf and conf.initial or 0; - registry[name..":amount"] = function () - return "amount", v, conf; - end - return function (delta) - v = v + delta; - end; - end; - rate = function (name, conf) - local since, n, total = time(), 0, 0; - registry[name..":rate"] = function () - total = total + n; - local t = time(); - local stats = { - rate = n/(t-since); - count = n; - total = total; - units = conf and conf.units; - type = conf and conf.type; - }; - since, n = t, 0; - return "rate", stats.rate, stats; - end; - return function () - n = n + 1; - end; - end; - distribution = function (name, conf) - local units = conf and conf.units; - local type = conf and conf.type or "distribution"; - local events, last_event = {}, 0; - local n_actual_events = 0; - local since = time(); - - registry[name..":"..type] = function () - local new_time = time(); - local stats = get_distribution_stats(events, n_actual_events, since, new_time, units); - events, last_event = {}, 0; - n_actual_events = 0; - since = new_time; - return type, stats.average, stats; - end; - - return function (value) - n_actual_events = n_actual_events + 1; - if n_actual_events%duration_sample_interval == 1 then - last_event = (last_event%duration_max_samples) + 1; - events[last_event] = value; - end - end; - end; - sizes = function (name, conf) - conf = conf or { units = "bytes", type = "size" } - return methods.distribution(name, conf); - end; - times = function (name, conf) - local units = conf and conf.units or "seconds"; - local events, last_event = {}, 0; - local n_actual_events = 0; - local since = time(); - - registry[name..":duration"] = function () - local new_time = time(); - local stats = get_distribution_stats(events, n_actual_events, since, new_time, units); - events, last_event = {}, 0; - n_actual_events = 0; - since = new_time; - return "duration", stats.average, stats; - end; - - return function () - n_actual_events = n_actual_events + 1; - if n_actual_events%duration_sample_interval ~= 1 then - return nop_function; - end - - local start_time = time(); - return function () - local end_time = time(); - local duration = end_time - start_time; - last_event = (last_event%duration_max_samples) + 1; - events[last_event] = duration; - end - end; - end; - - get_stats = function () - return registry; - end; - }; - return methods; +function counter_metric_mt:add(value) + self.value = (self.value or 0) + value end -return { - new = new_registry; - get_histogram = function (duration, n_buckets) - n_buckets = n_buckets or 100; - local events, n_events = duration.samples, duration.sample_count; - if not (events and n_events) then - return nil, "not a valid distribution stat"; +function counter_metric_mt:iter_samples() + local step = 0 + return function(_s) + step = step + 1 + if step == 1 then + return "_created", nil, _s._created + elseif step == 2 then + return "_total", nil, _s.value + else + return nil, nil, true + end + end, self +end + +function counter_metric_mt:reset() + self.value = 0 +end + +-- Histograms +local histogram_metric_mt = {} +histogram_metric_mt.__index = histogram_metric_mt + +local function new_histogram_metric(buckets) + local metric = { + _created = time(), + _sum = 0, + _count = 0, + } + -- the order of buckets matters unfortunately, so we cannot directly use + -- the thresholds as table keys + for i, threshold in ipairs(buckets) do + metric[i] = { + threshold = threshold, + threshold_s = render_histogram_le(threshold), + count = 0 + } + end + setmetatable(metric, histogram_metric_mt) + return metric +end + +function histogram_metric_mt:sample(value) + -- According to the I-D, values must be part of all buckets + for i, bucket in pairs(self) do + if "number" == type(i) and bucket.threshold > value then + bucket.count = bucket.count + 1 end - local histogram = {}; + end + self._sum = self._sum + value + self._count = self._count + 1 +end - for i = 1, 100, 100/n_buckets do - histogram[i] = percentile(events, n_events, i); +function histogram_metric_mt:iter_samples() + local key = nil + return function (_s) + local data + key, data = next(_s, key) + if key == "_created" or key == "_sum" or key == "_count" then + return key, nil, data + elseif key ~= nil then + return "_bucket", {["le"] = data.threshold_s}, data.count + else + return nil, nil, nil end - return histogram; - end; + end, self +end - get_percentile = function (duration, pc) - local events, n_events = duration.samples, duration.sample_count; - if not (events and n_events) then - return nil, "not a valid distribution stat"; +function histogram_metric_mt:reset() + self._created = time() + self._count = 0 + self._sum = 0 + for i, bucket in pairs(self) do + if "number" == type(i) then + bucket.count = 0 end - return percentile(events, n_events, pc); - end; + end +end + +-- Summary +local summary_metric_mt = {} +summary_metric_mt.__index = summary_metric_mt + +local function new_summary_metric() + -- quantiles are not supported yet + local metric = { + _created = time(), + _sum = 0, + _count = 0, + } + setmetatable(metric, summary_metric_mt) + return metric +end + +function summary_metric_mt:sample(value) + self._sum = self._sum + value + self._count = self._count + 1 +end + +function summary_metric_mt:iter_samples() + local key = nil + return function (_s) + local data + key, data = next(_s, key) + return key, nil, data + end, self +end + +function summary_metric_mt:reset() + self._created = time() + self._count = 0 + self._sum = 0 +end + +local pull_backend = { + gauge = new_gauge_metric, + counter = new_counter_metric, + histogram = new_histogram_metric, + summary = new_summary_metric, +} + +-- END of Metric implementations + +local function new() + return { + metric_registry = new_metric_registry(pull_backend), + } +end + +return { + new = new; } diff --git a/util/statsd.lua b/util/statsd.lua index 8f6151c6..25e03e38 100644 --- a/util/statsd.lua +++ b/util/statsd.lua @@ -1,82 +1,267 @@ local socket = require "socket"; +local time = require "util.time".now; +local array = require "util.array"; +local t_concat = table.concat; -local time = require "util.time".now +local new_metric_registry = require "util.openmetrics".new_metric_registry; +local render_histogram_le = require "util.openmetrics".render_histogram_le; -local function new(config) - if not config or not config.statsd_server then - return nil, "No statsd server specified in the config, please see https://prosody.im/doc/statistics"; +-- BEGIN of Metric implementations + +-- Gauges +local gauge_metric_mt = {} +gauge_metric_mt.__index = gauge_metric_mt + +local function new_gauge_metric(full_name, impl) + local metric = { + _full_name = full_name; + _impl = impl; + value = 0; + } + setmetatable(metric, gauge_metric_mt) + return metric +end + +function gauge_metric_mt:set(value) + self.value = value + self._impl:push_gauge(self._full_name, value) +end + +function gauge_metric_mt:add(delta) + self.value = self.value + delta + self._impl:push_gauge(self._full_name, self.value) +end + +function gauge_metric_mt:reset() + self.value = 0 + self._impl:push_gauge(self._full_name, 0) +end + +function gauge_metric_mt.iter_samples() + -- statsd backend does not support iteration. + return function() + return nil end +end - local sock = socket.udp(); - sock:setpeername(config.statsd_server, config.statsd_port or 8125); +-- Counters +local counter_metric_mt = {} +counter_metric_mt.__index = counter_metric_mt - local prefix = (config.prefix or "prosody").."."; +local function new_counter_metric(full_name, impl) + local metric = { + _full_name = full_name, + _impl = impl, + value = 0, + } + setmetatable(metric, counter_metric_mt) + return metric +end + +function counter_metric_mt:set(value) + local delta = value - self.value + self.value = value + self._impl:push_counter_delta(self._full_name, delta) +end - local function send_metric(s) - return sock:send(prefix..s); +function counter_metric_mt:add(value) + self.value = (self.value or 0) + value + self._impl:push_counter_delta(self._full_name, value) +end + +function counter_metric_mt.iter_samples() + -- statsd backend does not support iteration. + return function() + return nil + end +end + +function counter_metric_mt:reset() + self.value = 0 +end + +-- Histograms +local histogram_metric_mt = {} +histogram_metric_mt.__index = histogram_metric_mt + +local function new_histogram_metric(buckets, full_name, impl) + -- NOTE: even though the more or less proprietrary dogstatsd has its own + -- histogram implementation, we push the individual buckets in this statsd + -- backend for both consistency and compatibility across statsd + -- implementations. + local metric = { + _sum_name = full_name..".sum", + _count_name = full_name..".count", + _impl = impl, + _created = time(), + _sum = 0, + _count = 0, + } + -- the order of buckets matters unfortunately, so we cannot directly use + -- the thresholds as table keys + for i, threshold in ipairs(buckets) do + local threshold_s = render_histogram_le(threshold) + metric[i] = { + threshold = threshold, + threshold_s = threshold_s, + count = 0, + _full_name = full_name..".bucket."..(threshold_s:gsub("%.", "_")), + } end + setmetatable(metric, histogram_metric_mt) + return metric +end - local function send_gauge(name, amount, relative) - local s_amount = tostring(amount); - if relative and amount > 0 then - s_amount = "+"..s_amount; +function histogram_metric_mt:sample(value) + -- According to the I-D, values must be part of all buckets + for i, bucket in pairs(self) do + if "number" == type(i) and bucket.threshold > value then + bucket.count = bucket.count + 1 + self._impl:push_counter_delta(bucket._full_name, 1) end - return send_metric(name..":"..s_amount.."|g"); end + self._sum = self._sum + value + self._count = self._count + 1 + self._impl:push_gauge(self._sum_name, self._sum) + self._impl:push_counter_delta(self._count_name, 1) +end - local function send_counter(name, amount) - return send_metric(name..":"..tostring(amount).."|c"); +function histogram_metric_mt.iter_samples() + -- statsd backend does not support iteration. + return function() + return nil end +end - local function send_duration(name, duration) - return send_metric(name..":"..tostring(duration).."|ms"); +function histogram_metric_mt:reset() + self._created = time() + self._count = 0 + self._sum = 0 + for i, bucket in pairs(self) do + if "number" == type(i) then + bucket.count = 0 + end end + self._impl:push_gauge(self._sum_name, self._sum) +end + +-- Summaries +local summary_metric_mt = {} +summary_metric_mt.__index = summary_metric_mt + +local function new_summary_metric(full_name, impl) + local metric = { + _sum_name = full_name..".sum", + _count_name = full_name..".count", + _impl = impl, + } + setmetatable(metric, summary_metric_mt) + return metric +end + +function summary_metric_mt:sample(value) + self._impl:push_counter_delta(self._sum_name, value) + self._impl:push_counter_delta(self._count_name, 1) +end - local function send_histogram_sample(name, sample) - return send_metric(name..":"..tostring(sample).."|h"); +function summary_metric_mt.iter_samples() + -- statsd backend does not support iteration. + return function() + return nil end +end - local methods; - methods = { - amount = function (name, conf) - if conf and conf.initial then - send_gauge(name, conf.initial); - end - return function (new_v) send_gauge(name, new_v); end - end; - counter = function (name, conf) --luacheck: ignore 212/conf - return function (delta) - send_gauge(name, delta, true); - end; - end; - rate = function (name) - return function () - send_counter(name, 1); - end; +function summary_metric_mt.reset() +end + +-- BEGIN of statsd client implementation + +local statsd_mt = {} +statsd_mt.__index = statsd_mt + +function statsd_mt:cork() + self.corked = true + self.cork_buffer = self.cork_buffer or {} +end + +function statsd_mt:uncork() + self.corked = false + self:_flush_cork_buffer() +end + +function statsd_mt:_flush_cork_buffer() + local buffer = self.cork_buffer + for metric_name, value in pairs(buffer) do + self:_send_gauge(metric_name, value) + buffer[metric_name] = nil + end +end + +function statsd_mt:push_gauge(metric_name, value) + if self.corked then + self.cork_buffer[metric_name] = value + else + self:_send_gauge(metric_name, value) + end +end + +function statsd_mt:_send_gauge(metric_name, value) + self:_send(self.prefix..metric_name..":"..tostring(value).."|g") +end + +function statsd_mt:push_counter_delta(metric_name, delta) + self:_send(self.prefix..metric_name..":"..tostring(delta).."|c") +end + +function statsd_mt:_send(s) + return self.sock:send(s) +end + +-- END of statsd client implementation + +local function build_metric_name(family_name, labels) + local parts = array { family_name } + if labels then + parts:append(labels) + end + return t_concat(parts, "/"):gsub("%.", "_"):gsub("/", ".") +end + +local function new(config) + if not config or not config.statsd_server then + return nil, "No statsd server specified in the config, please see https://prosody.im/doc/statistics"; + end + + local sock = socket.udp(); + sock:setpeername(config.statsd_server, config.statsd_port or 8125); + + local prefix = (config.prefix or "prosody").."."; + + local impl = { + metric_registry = nil; + sock = sock; + prefix = prefix; + }; + setmetatable(impl, statsd_mt) + + local backend = { + gauge = function(family_name, labels) + return new_gauge_metric(build_metric_name(family_name, labels), impl) end; - distribution = function (name, conf) --luacheck: ignore 212/conf - return function (value) - send_histogram_sample(name, value); - end; + counter = function(family_name, labels) + return new_counter_metric(build_metric_name(family_name, labels), impl) end; - sizes = function (name) - name = name.."_size"; - return function (value) - send_histogram_sample(name, value); - end; + histogram = function(buckets, family_name, labels) + return new_histogram_metric(buckets, build_metric_name(family_name, labels), impl) end; - times = function (name) - return function () - local start_time = time(); - return function () - local end_time = time(); - local duration = end_time - start_time; - send_duration(name, duration*1000); - end - end; + summary = function(family_name, labels, extra) + return new_summary_metric(build_metric_name(family_name, labels), impl, extra) end; }; - return methods; + + impl.metric_registry = new_metric_registry(backend); + + return impl; end return { |