Skip to content

Commit 36dc9bb

Browse files
Support metrics integration without quantiles
Quantiles computations increases performance overhead by near 10% when used in statistics. One may want to use statistics with metrics without quantiles. The patch allows one to do it. Patch also bumps minimal required metrics rock version. `metrics >= 0.9.0` is required to use summary quantiles with age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported due to quantile overflow bug [1]. `metrics == 0.9.0` has bug that do not permits to create summary collector without quantiles [2]. In fact, user may use `metrics >= 0.5.0`, `metrics != 0.9.0` if he wants to use metrics without quantiles, and `metrics >= 0.9.0` if he wants to use metrics with quantiles. But this is confusing, so let's use a single restriction for both cases. 1. tarantool/metrics#235 2. tarantool/metrics#262 Follows up #224
1 parent 3dd1c6f commit 36dc9bb

File tree

10 files changed

+226
-103
lines changed

10 files changed

+226
-103
lines changed

.github/workflows/test_on_push.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- tarantool-version: "2.8"
2727
metrics-version: "0.1.8"
2828
- tarantool-version: "2.8"
29-
metrics-version: "0.9.0"
29+
metrics-version: "0.10.0"
3030
- tarantool-version: "2.8"
3131
coveralls: true
3232
metrics-version: "0.12.0"

README.md

+9-5
Original file line numberDiff line numberDiff line change
@@ -606,18 +606,21 @@ crud.disable_stats()
606606
crud.reset_stats()
607607
```
608608

609-
If [`metrics`](https://github.com/tarantool/metrics) `0.9.0` or greater
609+
If [`metrics`](https://github.com/tarantool/metrics) `0.10.0` or greater
610610
found, metrics collectors will be used by default to store statistics
611611
instead of local collectors. You can manually choose driver if needed.
612612
```lua
613-
-- Use metrics collectors.
613+
-- Use metrics collectors. (Default if metrics found).
614614
crud.enable_stats({ driver = 'metrics' })
615615

616+
-- Use metrics collectors with 0.99 quantile.
617+
crud.enable_stats({ driver = 'metrics', quantiles = true })
618+
616619
-- Use simple local collectors.
617620
crud.enable_stats({ driver = 'local' })
618621
```
619-
Performance overhead is 3-5% in case of `local` driver and
620-
10-20% in case of `metrics` driver.
622+
Performance overhead is 3-7% in case of `local` driver and
623+
5-10% in case of `metrics` driver, up to 20% for `metrics` with quantiles.
621624

622625
Format is as follows.
623626
```
@@ -668,7 +671,8 @@ Each operation section contains of different collectors
668671
for success calls and error (both error throw and `nil, err`)
669672
returns. `count` is total requests count since instance start
670673
or stats restart. `latency` is 0.99 quantile of request execution
671-
time if `metrics` driver used, otherwise `latency` is total average.
674+
time if `metrics` driver used and quantiles enabled,
675+
otherwise `latency` is total average.
672676
`time` is total time of requests execution.
673677

674678
In [`metrics`](https://www.tarantool.io/en/doc/latest/book/monitoring/)

crud/stats/local_registry.lua

+28-15
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,36 @@
1+
local errors = require('errors')
2+
13
local dev_checks = require('crud.common.dev_checks')
24
local op_module = require('crud.stats.operation')
35
local registry_common = require('crud.stats.registry_common')
46
local stash = require('crud.stats.stash')
57

68
local registry = {}
7-
local internal_registry = stash.get('local_registry')
9+
local internal = stash.get('local_registry')
10+
local StatsLocalError = errors.new_class('StatsLocalError', {capture_stack = false})
811

912
--- Initialize local metrics registry
1013
--
1114
-- Registries are not meant to used explicitly
1215
-- by users, init is not guaranteed to be idempotent.
1316
--
1417
-- @function init
18+
-- @tparam table opts
19+
--
20+
-- @tfield boolean quantiles
21+
-- Quantiles is not supported for local, only `false` is valid.
1522
--
1623
-- @treturn boolean Returns true.
1724
--
18-
function registry.init()
19-
internal_registry.spaces = {}
20-
internal_registry.space_not_found = 0
25+
function registry.init(opts)
26+
dev_checks({ quantiles = 'boolean' })
27+
28+
StatsLocalError:assert(opts.quantiles == false,
29+
"Quantiles are not supported for 'local' statistics registry")
30+
31+
internal.registry = {}
32+
internal.registry.spaces = {}
33+
internal.registry.space_not_found = 0
2134

2235
return true
2336
end
@@ -32,7 +45,7 @@ end
3245
-- @treturn boolean Returns true.
3346
--
3447
function registry.destroy()
35-
internal_registry = stash.reset('local_registry')
48+
internal.registry = nil
3649

3750
return true
3851
end
@@ -58,10 +71,10 @@ function registry.get(space_name)
5871
dev_checks('?string')
5972

6073
if space_name ~= nil then
61-
return table.deepcopy(internal_registry.spaces[space_name]) or {}
74+
return table.deepcopy(internal.registry.spaces[space_name]) or {}
6275
end
6376

64-
return table.deepcopy(internal_registry)
77+
return table.deepcopy(internal.registry)
6578
end
6679

6780
--- Check if space statistics are present in registry
@@ -76,7 +89,7 @@ end
7689
function registry.is_unknown_space(space_name)
7790
dev_checks('string')
7891

79-
return internal_registry.spaces[space_name] == nil
92+
return internal.registry.spaces[space_name] == nil
8093
end
8194

8295
--- Increase requests count and update latency info
@@ -101,8 +114,8 @@ end
101114
function registry.observe(latency, space_name, op, status)
102115
dev_checks('number', 'string', 'string', 'string')
103116

104-
registry_common.init_collectors_if_required(internal_registry.spaces, space_name, op)
105-
local collectors = internal_registry.spaces[space_name][op][status]
117+
registry_common.init_collectors_if_required(internal.registry.spaces, space_name, op)
118+
local collectors = internal.registry.spaces[space_name][op][status]
106119

107120
collectors.count = collectors.count + 1
108121
collectors.time = collectors.time + latency
@@ -118,7 +131,7 @@ end
118131
-- @treturn boolean Returns true.
119132
--
120133
function registry.observe_space_not_found()
121-
internal_registry.space_not_found = internal_registry.space_not_found + 1
134+
internal.registry.space_not_found = internal.registry.space_not_found + 1
122135

123136
return true
124137
end
@@ -142,8 +155,8 @@ function registry.observe_fetch(tuples_fetched, tuples_lookup, space_name)
142155
dev_checks('number', 'number', 'string')
143156

144157
local op = op_module.SELECT
145-
registry_common.init_collectors_if_required(internal_registry.spaces, space_name, op)
146-
local collectors = internal_registry.spaces[space_name][op].details
158+
registry_common.init_collectors_if_required(internal.registry.spaces, space_name, op)
159+
local collectors = internal.registry.spaces[space_name][op].details
147160

148161
collectors.tuples_fetched = collectors.tuples_fetched + tuples_fetched
149162
collectors.tuples_lookup = collectors.tuples_lookup + tuples_lookup
@@ -167,8 +180,8 @@ function registry.observe_map_reduces(count, space_name)
167180
dev_checks('number', 'string')
168181

169182
local op = op_module.SELECT
170-
registry_common.init_collectors_if_required(internal_registry.spaces, space_name, op)
171-
local collectors = internal_registry.spaces[space_name][op].details
183+
registry_common.init_collectors_if_required(internal.registry.spaces, space_name, op)
184+
local collectors = internal.registry.spaces[space_name][op].details
172185

173186
collectors.map_reduces = collectors.map_reduces + count
174187

0 commit comments

Comments
 (0)