aboutsummaryrefslogtreecommitdiffstats
path: root/util/statistics.lua
blob: 3995465221922a6abe94f28898618d03a861cfe0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
local t_sort = table.sort
local m_floor = math.floor;
local time = require "util.time".now;

local function nop_function() end

local function percentile(arr, length, pc)
	local n = pc/100 * (length + 1);
	local k, d = m_floor(n), n%1;
	if k == 0 then
		return arr[1] or 0;
	elseif k >= length then
		return arr[length];
	end
	return arr[k] + d*(arr[k+1] - arr[k]);
end

local function new_registry(config)
	config = config or {};
	local duration_sample_interval = config.duration_sample_interval or 5;
	local duration_max_samples = config.duration_max_stored_samples or 5000;

	local function get_distribution_stats(events, n_actual_events, since, new_time, units)
		local n_stored_events = #events;
		t_sort(events);
		local sum = 0;
		for i = 1, n_stored_events do
			sum = sum + events[i];
		end

		return {
			samples = events;
			sample_count = n_stored_events;
			count = n_actual_events,
			rate = n_actual_events/(new_time-since);
			average = n_stored_events > 0 and sum/n_stored_events or 0,
			min = events[1] or 0,
			max = events[n_stored_events] or 0,
			units = units,
		};
	end


	local registry = {};
	local methods;
	methods = {
		amount = function (name, initial)
			local v = initial or 0;
			registry[name..":amount"] = function () return "amount", v; end
			return function (new_v) v = new_v; end
		end;
		counter = function (name, initial)
			local v = initial or 0;
			registry[name..":amount"] = function () return "amount", v; end
			return function (delta)
				v = v + delta;
			end;
		end;
		rate = function (name)
			local since, n = time(), 0;
			registry[name..":rate"] = function ()
				local t = time();
				local stats = {
					rate = n/(t-since);
					count = n;
				};
				since, n = t, 0;
				return "rate", stats.rate, stats;
			end;
			return function ()
				n = n + 1;
			end;
		end;
		distribution = function (name, unit, type)
			type = type or "distribution";
			local events, last_event = {}, 0;
			local n_actual_events = 0;
			local since = time();

			registry[name..":"..type] = function ()
				local new_time = time();
				local stats = get_distribution_stats(events, n_actual_events, since, new_time, unit);
				events, last_event = {}, 0;
				n_actual_events = 0;
				since = new_time;
				return type, stats.average, stats;
			end;

			return function (value)
				n_actual_events = n_actual_events + 1;
				if n_actual_events%duration_sample_interval == 1 then
					last_event = (last_event%duration_max_samples) + 1;
					events[last_event] = value;
				end
			end;
		end;
		sizes = function (name)
			return methods.distribution(name, "bytes", "size");
		end;
		times = function (name)
			local events, last_event = {}, 0;
			local n_actual_events = 0;
			local since = time();

			registry[name..":duration"] = function ()
				local new_time = time();
				local stats = get_distribution_stats(events, n_actual_events, since, new_time, "seconds");
				events, last_event = {}, 0;
				n_actual_events = 0;
				since = new_time;
				return "duration", stats.average, stats;
			end;

			return function ()
				n_actual_events = n_actual_events + 1;
				if n_actual_events%duration_sample_interval ~= 1 then
					return nop_function;
				end

				local start_time = time();
				return function ()
					local end_time = time();
					local duration = end_time - start_time;
					last_event = (last_event%duration_max_samples) + 1;
					events[last_event] = duration;
				end
			end;
		end;

		get_stats = function ()
			return registry;
		end;
	};
	return methods;
end

return {
	new = new_registry;
	get_histogram = function (duration, n_buckets)
		n_buckets = n_buckets or 100;
		local events, n_events = duration.samples, duration.sample_count;
		if not (events and n_events) then
			return nil, "not a valid distribution stat";
		end
		local histogram = {};

		for i = 1, 100, 100/n_buckets do
			histogram[i] = percentile(events, n_events, i);
		end
		return histogram;
	end;

	get_percentile = function (duration, pc)
		local events, n_events = duration.samples, duration.sample_count;
		if not (events and n_events) then
			return nil, "not a valid distribution stat";
		end
		return percentile(events, n_events, pc);
	end;
}