From 36dba7a684208a1bd1b0decb33ea3553405b4187 Mon Sep 17 00:00:00 2001 From: Anthony Molinaro Date: Mon, 1 Jul 2019 21:49:02 +0000 Subject: [PATCH 1/4] Initial Stab at prometheus exporter --- ChangeLog | 6 + Makefile | 2 +- include/mondemand.hrl | 3 +- mondemand_dev.config | 1 + rebar.config | 8 +- src/mondemand.erl | 268 ++++++++++++++++++++++++-------- src/mondemand_config.erl | 29 +++- src/mondemand_httpd.erl | 22 +++ src/mondemand_statdb.erl | 310 ++++++++++++++++--------------------- src/mondemand_statsmsg.erl | 109 +++++++++++-- src/mondemand_sup.erl | 23 ++- src/mondemand_util.erl | 12 ++ 12 files changed, 530 insertions(+), 263 deletions(-) create mode 100644 src/mondemand_httpd.erl diff --git a/ChangeLog b/ChangeLog index b26abef..ed4923b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +* Mon Jul 01 2019 Anthony Molinaro 7.0.0 +- Added extra field to #md_metric record for description (NOT BACKWARD +COMPATIBLE if using this record) +- Moved many functions from mondemand_statdb module to mondemand module +- Added optional http server with prometheus exporter + * Tue Jun 18 2019 Anthony Molinaro 6.12.1 - rename some confusing names - get mondemand_statdb:config() to print out parts of md_config table diff --git a/Makefile b/Makefile index 97b719c..a6d74d2 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,6 @@ clean: if test -d _build; then $(REBAR3) clean; fi maintainer-clean: clean - rm -rf _build + rm -rf _build deps ebin tmp _checkouts/*/ebin .PHONY: all test name version clean maintainer-clean diff --git a/include/mondemand.hrl b/include/mondemand.hrl index bf2f120..e751aa5 100644 --- a/include/mondemand.hrl +++ b/include/mondemand.hrl @@ -52,7 +52,8 @@ }). -record (md_metric, { type, key, - value + value, + description }). -record (md_statset, { count, sum, diff --git a/mondemand_dev.config b/mondemand_dev.config index 45b877e..16508d2 100644 --- a/mondemand_dev.config +++ b/mondemand_dev.config @@ -7,6 +7,7 @@ % { config_file, "mondemand.conf" }, % , { vmstats, [ { program_id, mondemand_erlang } ] } % { send_interval, 5 } + , { httpd, [ {port, 8082} ] } ] } ]. diff --git a/rebar.config b/rebar.config index b0a0c11..42e0b43 100644 --- a/rebar.config +++ b/rebar.config @@ -3,11 +3,9 @@ %% enable coverage output when running eunit { cover_enabled, true }. -{ cover_export_enabled, true }. { cover_opts, [verbose] }. -{ cover_print_enabled, true }. -%% always include debug info so AST is included in beams +%% always include debug_info so AST is included in beams {erl_opts, [ debug_info, @@ -15,11 +13,13 @@ ] }. +{dialyzer, [ {warnings, [no_unused]} ]}. + {clean_files, ["ebin", "doc"]}. {deps, [ { lwes, {git, "git://github.com/lwes/lwes-erlang.git", {tag, "5.0.0"}} }, - { parse_trans, {git, "git://github.com/uwiger/parse_trans.git", {tag, "3.2.0"} } } + { parse_trans, {git, "git://github.com/uwiger/parse_trans.git", {tag, "3.3.0"} } } ] }. diff --git a/src/mondemand.erl b/src/mondemand.erl index 3c0c353..265b95e 100644 --- a/src/mondemand.erl +++ b/src/mondemand.erl @@ -34,53 +34,93 @@ -behaviour (gen_server). %% API --export ( [start_link/0, - get_state/0, - - % stats functions - % counters - increment/2, % (ProgId, Key) - increment/3, % (ProgId, Key, Amount | [{ContextKey,ContextValue}]) - increment/4, % (ProgId, Key, Amount | [{ContextKey,ContextValue}], [{ContextKey,ContextValue}]| Amount ) - % gauges - set/3, % (ProgId, Key, Value) - set/4, % (ProgId, Key, [{ContextKey,ContextValue}], Value) - % statsets - add_sample/3, % (ProgId, Key, Value) - add_sample/4, % (ProgId, Key, [{ContextKey,ContextValue}], Value) - % counters that emit metrics as gauges - gincrement/2, % (ProgId, Key) - gincrement/3, % (ProgId, Key, Increment) - gincrement/4, % (ProgId, Key, [{ContextKey,ContextValue}], Increment) - - % tracing functions - send_trace/3, - send_trace/5, - - % performance tracing functions - send_perf_info/3, % (Id, CallerLabel, Timings) - send_perf_info/4, % (Id, CallerLabel, Timings, [{ContextKey,ContextValue}]) - send_perf_info/5, % (Id, CallerLabel, Label, StartTime, StopTime) - send_perf_info/6, % (Id, CallerLabel, Label, StartTime, StopTime, [{ContextKey,ContextValue}]) - - % annotation functions - send_annotation/4, % (Id, Timestamp, Description, Text) - send_annotation/5, % (Id, Timestamp, Description, Text, [Tag]) - send_annotation/6, % (Id, Timestamp, Description, Text, [Tag],[{ContextKey,ContextValue}]) - - % other functions - send_stats/3, - flush_state_init/0, - flush_one_stat/2, - reset_stats/0, - stats/0, - all/0, - all_event_names_as_binary/0, - get_lwes_config/0, - reload_config/0, - current_config/0, - restart/0 - ]). +-export ([ + start_link/0, + + % ------------ stats functions ----------- + % + % ============ counters ================= + create_counter/2, % (ProgId, Key) + create_counter/3, % (ProgId, Key, [{CtxtKey,CtxtVal}] | Desc) + create_counter/4, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc) + create_counter/5, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc, InitialAmount) + increment/2, % (ProgId, Key) + increment/3, % (ProgId, Key, Amount | [{CtxtKey,CtxtValue}]) + increment/4, % (ProgId, Key, Amount | [{CtxtKey,CtxtValue}], [{CtxtKey,CtxtValue}]| Amount ) + fetch_counter/2, % (ProgId, Key) + fetch_counter/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + remove_counter/2, % (ProgId, Key) + remove_counter/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + + % ============ gauges ================= + create_gauge/2, % (ProgId, Key) + create_gauge/3, % (ProgId, Key, [{CtxtKey,CtxtVal}] | Desc) + create_gauge/4, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc) + create_gauge/5, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc, InitialAmount) + set/3, % (ProgId, Key, Value) + set/4, % (ProgId, Key, [{ContextKey,ContextValue}], Value) + fetch_gauge/2, % (ProgId, Key) + fetch_gauge/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + remove_gauge/2, % (ProgId, Key) + remove_gauge/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + + % ============ statsets ================= + create_sample_set/2, % (ProgId, Key) + create_sample_set/3, % (ProgId, Key, [{CtxtKey,CtxtVal}] | Desc) + create_sample_set/4, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc) + create_sample_set/5, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc, MaxSamples) + create_sample_set/6, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc, MaxSamples, StatTypeToSend) + + add_sample/3, % (ProgId, Key, Value) + add_sample/4, % (ProgId, Key, [{CtxtKey,CtxtValue}], Value) + fetch_sample_set/2, % (ProgId, Key) + fetch_sample_set/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + remove_sample_set/2, % (ProgId, Key) + remove_sample_set/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + + % ============ gcounters ================= + % counters that emit metrics as gauges + create_gcounter/2, % (ProgId, Key) + create_gcounter/3, % (ProgId, Key, [{CtxtKey,CtxtVal}] | Desc) + create_gcounter/4, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc) + create_gcounter/5, % (ProgId, Key, [{CtxtKey,CtxtVal}], Desc, InitialAmount) + gincrement/2, % (ProgId, Key) + gincrement/3, % (ProgId, Key, Increment) + gincrement/4, % (ProgId, Key, [{ContextKey,ContextValue}], Increment) + fetch_gcounter/2, % (ProgId, Key) + fetch_gcounter/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + remove_gcounter/2, % (ProgId, Key) + remove_gcounter/3, % (ProgId, Key, [{CtxtKey,CtxtValue}]) + + % ------------ tracing functions ----------- + send_trace/3, + send_trace/5, + + % ------------ performance tracing functions ----------- + send_perf_info/3, % (Id, CallerLabel, Timings) + send_perf_info/4, % (Id, CallerLabel, Timings, [{ContextKey,ContextValue}]) + send_perf_info/5, % (Id, CallerLabel, Label, StartTime, StopTime) + send_perf_info/6, % (Id, CallerLabel, Label, StartTime, StopTime, [{ContextKey,ContextValue}]) + + % ------------ annotation functions ----------- + send_annotation/4, % (Id, Timestamp, Description, Text) + send_annotation/5, % (Id, Timestamp, Description, Text, [Tag]) + send_annotation/6, % (Id, Timestamp, Description, Text, [Tag],[{ContextKey,ContextValue}]) + + % ------------ other functions ----------- + send_stats/3, + flush_state_init/0, + flush_one_stat/2, + reset_stats/0, + stats/0, + export_as_prometheus/0, + all/0, + all_event_names_as_binary/0, + get_lwes_config/0, + reload_config/0, + current_config/0, + restart/0 +]). %% gen_server callbacks -export ( [ init/1, @@ -106,31 +146,95 @@ start_link() -> gen_server:start_link ({local, ?MODULE}, ?MODULE, [], []). -get_state() -> - gen_server:call (?MODULE, get_state). +create_counter (ProgId, Key) -> + create_counter (ProgId, Key, [], "", 0). +create_counter (ProgId, Key, Context = [{_,_}|_]) -> + create_counter (ProgId, Key, Context, "", 0); +create_counter (ProgId, Key, Description) + when is_list(Description) ; is_binary(Description) -> + create_counter (ProgId, Key, [], Description, 0). +create_counter (ProgId, Key, Context = [{_,_}|_], Description) -> + create_counter (ProgId, Key, Context, Description, 0). +create_counter (ProgId, Key, Context, Description, InitialAmount) -> + mondemand_statdb:create_counter (ProgId, Key, Context, Description, InitialAmount). increment (ProgId, Key) -> increment (ProgId, Key, [], 1). - increment (ProgId, Key, Context) when is_list (Context) -> increment (ProgId, Key, Context, 1); increment (ProgId, Key, Amount) when is_integer (Amount) -> increment (ProgId, Key, [], Amount). - % this first clause is just for legacy systems increment (ProgId, Key, Amount, Context) when is_integer (Amount), is_list (Context) -> increment (ProgId, Key, Context, Amount); increment (ProgId, Key, Context, Amount) when is_integer (Amount), is_list (Context) -> - mondemand_statdb:increment (ProgId, Key, Context, Amount). + mondemand_statdb:increment_counter (ProgId, Key, Context, Amount). + +fetch_counter (ProgId, Key) -> + fetch_counter (ProgId, Key, []). +fetch_counter (ProgId, Key, Context) -> + mondemand_statdb:fetch_counter (ProgId, Key, Context). + +remove_counter (ProgId, Key) -> + remove_counter (ProgId, Key, []). +remove_counter (ProgId, Key, Context) -> + mondemand_statdb:remove_counter (ProgId, Key, Context). + +create_gauge (ProgId, Key) -> + create_gauge (ProgId, Key, [], "", 0). +create_gauge (ProgId, Key, Context = [{_,_}|_]) -> + create_gauge (ProgId, Key, Context, "", 0); +create_gauge (ProgId, Key, Description) + when is_list(Description) ; is_binary(Description) -> + create_gauge (ProgId, Key, [], Description, 0). +create_gauge (ProgId, Key, Context, Description) -> + create_gauge (ProgId, Key, Context, Description, 0). +create_gauge (ProgId, Key, Context, Description, InitialAmount) + when is_integer (InitialAmount), is_list (Context) -> + mondemand_statdb:create_gauge (ProgId, Key, Context, Description, InitialAmount). set (ProgId, Key, Amount) when is_integer (Amount) -> set (ProgId, Key, [], Amount). - set (ProgId, Key, Context, Amount) when is_integer (Amount), is_list (Context) -> - mondemand_statdb:set (ProgId, Key, Context, Amount). + mondemand_statdb:set_gauge (ProgId, Key, Context, Amount). + +fetch_gauge (ProgId, Key) -> + fetch_gauge (ProgId, Key, []). +fetch_gauge (ProgId, Key, Context) -> + mondemand_statdb:fetch_gauge (ProgId, Key, Context). + +remove_gauge (ProgId, Key) -> + remove_gauge (ProgId, Key, []). +remove_gauge (ProgId, Key, Context) -> + mondemand_statdb:remove_gauge (ProgId, Key, Context). + +create_sample_set (ProgId, Key) -> + create_sample_set (ProgId, Key, [], "", + mondemand_config:default_max_sample_size(), + mondemand_config:default_stats()). +create_sample_set (ProgId, Key, Context = [{_,_}|_]) -> + create_sample_set (ProgId, Key, Context, "", + mondemand_config:default_max_sample_size(), + mondemand_config:default_stats()); +create_sample_set (ProgId, Key, Description) + when is_list(Description) ; is_binary(Description) -> + create_sample_set (ProgId, Key, [], Description, + mondemand_config:default_max_sample_size(), + mondemand_config:default_stats()). +create_sample_set (ProgId, Key, Context, Description) -> + create_sample_set (ProgId, Key, Context, Description, + mondemand_config:default_max_sample_size(), + mondemand_config:default_stats()). +create_sample_set (ProgId, Key, Context, Description, Max) -> + create_sample_set (ProgId, Key, Context, Description, + Max, + mondemand_config:default_stats()). +% Stats can be set to 'all' to include all available summary statistics +create_sample_set (ProgId, Key, Context, Description, Max, Stats) -> + mondemand_statdb:create_sample_set (ProgId, Key, Context, Description, Max, Stats). add_sample (ProgId, Key, Value) -> add_sample (ProgId, Key, [], Value). @@ -138,18 +242,48 @@ add_sample (ProgId, Key, Context, Value) when is_integer (Value), is_list (Context) -> mondemand_statdb:add_sample (ProgId, Key, Context, Value). +fetch_sample_set (ProgId, Key) -> + fetch_sample_set (ProgId, Key, []). +fetch_sample_set (ProgId, Key, Context) -> + mondemand_statdb:fetch_sample_set (ProgId, Key, Context). + +remove_sample_set (ProgId, Key) -> + remove_sample_set (ProgId, Key, []). +remove_sample_set (ProgId, Key, Context) -> + mondemand_statdb:remove_sample_set (ProgId, Key, Context). + +create_gcounter (ProgId, Key) -> + create_gcounter (ProgId, Key, [], "", 0). +create_gcounter (ProgId, Key, Context = [{_,_}|_]) -> + create_gcounter (ProgId, Key, Context, "", 0); +create_gcounter (ProgId, Key, Description) + when is_list(Description) ; is_binary(Description) -> + create_gcounter (ProgId, Key, [], Description, 0). +create_gcounter (ProgId, Key, Context = [{_,_}|_], Description) -> + create_gcounter (ProgId, Key, Context, Description, 0). +create_gcounter (ProgId, Key, Context, Description, InitialAmount) + when is_list(Context), is_integer(InitialAmount) -> + mondemand_statdb:create_gcounter (ProgId, Key, Context, Description, InitialAmount). + gincrement (ProgId, Key) -> gincrement (ProgId, Key, [], 1). - gincrement (ProgId, Key, Context) when is_list (Context) -> gincrement (ProgId, Key, Context, 1); gincrement (ProgId, Key, Amount) when is_integer (Amount) -> gincrement (ProgId, Key, [], Amount). - gincrement (ProgId, Key, Context, Amount) when is_list (Context), is_integer (Amount) -> - mondemand_statdb:gincrement (ProgId, Key, Context, Amount). + mondemand_statdb:increment_gcounter (ProgId, Key, Context, Amount). + +fetch_gcounter (ProgId, Key) -> + fetch_gcounter (ProgId, Key, []). +fetch_gcounter (ProgId, Key, Context) -> + mondemand_statdb:fetch_gcounter (ProgId, Key, Context). +remove_gcounter (ProgId, Key) -> + remove_gcounter (ProgId, Key, []). +remove_gcounter (ProgId, Key, Context) -> + mondemand_statdb:remove_gcounter (ProgId, Key, Context). all () -> mondemand_statdb:all(). @@ -162,6 +296,17 @@ get_lwes_config () -> stats () -> mondemand_statdb:all(). +export_as_prometheus () -> + % we export a 'minute ago' which actually just effects statsets. This uses + % the previous minutes statset db which will be complete and will export that + % time as well + mondemand_statdb:map_then ( + fun (S,A) -> + [mondemand_statsmsg:to_prometheus(S) | A] + end, + [], + 1). + trace_info_in_dict (Dict) -> trace_owner_in_dict (Dict) andalso trace_id_in_dict (Dict). @@ -371,8 +516,10 @@ init([]) -> FlushConfig = mondemand_config:flush_config(), % initialize a few internal counters - increment(mondemand_erlang, flush_count, 0), - increment(mondemand_erlang, flush_total_millis, 0), + create_counter(mondemand_erlang, flush_count, [], + "The total number of times events have been flushed", 0), + create_counter(mondemand_erlang, flush_total_millis, [], + "The total amount of time in milliseconds taken to flush", 0), case mondemand_config:lwes_config () of {error, Error} -> @@ -412,9 +559,6 @@ init([]) -> end end. -% just return the state -handle_call (get_state, _From, State) -> - {reply, State, State}; % restart with currently assigned config handle_call (restart, _From, State = #state { config = Config, diff --git a/src/mondemand_config.erl b/src/mondemand_config.erl index 3bc8300..91d9413 100644 --- a/src/mondemand_config.erl +++ b/src/mondemand_config.erl @@ -28,6 +28,10 @@ max_metrics/0, parse_config/1, get_http_config/0, + httpd_enabled/0, + httpd_port/0, + httpd_address/0, + httpd_metrics_endpoint/0, vmstats_enabled/0, vmstats_prog_id/0, vmstats_context/0, @@ -300,6 +304,21 @@ get_http_config () -> end end. +httpd_enabled () -> + case application:get_env (mondemand, httpd) of + {ok, L} when is_list (L) -> true; + _ -> false + end. + +httpd_port () -> + get_config_with_default (httpd, port, 31337). + +httpd_address () -> + get_config_with_default (httpd, bind_address, "0.0.0.0"). + +httpd_metrics_endpoint () -> + get_config_with_default (httpd, metrics_endpoint, "/md/metrics"). + vmstats_enabled () -> case application:get_env (mondemand, vmstats) of {ok, L} when is_list (L) -> @@ -312,11 +331,11 @@ vmstats_enabled () -> end. vmstats_prog_id () -> - vmstats_config (program_id, undefined). + get_config_with_default (vmstats, program_id, undefined). vmstats_context () -> - vmstats_config (context, []). + get_config_with_default (vmstats, context, []). vmstats_disable_scheduler_wall_time () -> - vmstats_config (disable_scheduler_wall_time, false). + get_config_with_default (vmstats, disable_scheduler_wall_time, false). vmstats_legacy_workaround () -> case application:get_env(mondemand,r15b_workaround) of @@ -324,8 +343,8 @@ vmstats_legacy_workaround () -> _ -> false end. -vmstats_config (K, Default) -> - case application:get_env (mondemand, vmstats) of +get_config_with_default (M, K, Default) -> + case application:get_env (mondemand, M) of {ok, L} when is_list (L) -> case proplists:get_value (K, L) of undefined -> Default; diff --git a/src/mondemand_httpd.erl b/src/mondemand_httpd.erl new file mode 100644 index 0000000..9d36279 --- /dev/null +++ b/src/mondemand_httpd.erl @@ -0,0 +1,22 @@ +-module(mondemand_httpd). + +-export([do/1]). + +-include_lib("inets/include/httpd.hrl"). + +-define(SERVER_NAME, "Mondemand metrics."). + +do(#mod { method = Method, request_uri = URI }) -> + case URI =:= mondemand_config:httpd_metrics_endpoint() + andalso Method =:= "GET" of + true -> + Body = mondemand:export_as_prometheus(), + ContentLength = integer_to_list(iolist_size(Body)), + RespHeaders = [{"accept","text/plain"}, + {code, 200}, + {content_type, "text/plain"}, + {content_length,ContentLength}], + {break,[{response, {response, RespHeaders, [Body]}}]}; + false -> + {break,[{response, {404,""}}]} + end. diff --git a/src/mondemand_statdb.erl b/src/mondemand_statdb.erl index db3f51c..0b1a0d4 100644 --- a/src/mondemand_statdb.erl +++ b/src/mondemand_statdb.erl @@ -42,59 +42,43 @@ %% API -export([ start_link/0, - get_state/0, % counter functions - create_counter/2, - create_counter/3, - create_counter/4, create_counter/5, - increment/2, - increment/3, - increment/4, - fetch_counter/2, + increment_counter/4, fetch_counter/3, - remove_counter/2, remove_counter/3, % gcounter functions - create_gcounter/4, - gincrement/4, - fetch_gcounter/2, + create_gcounter/5, + increment_gcounter/4, fetch_gcounter/3, + remove_gcounter/3, % gauge functions - create_gauge/2, - create_gauge/3, - create_gauge/4, create_gauge/5, - set/3, - set/4, - fetch_gauge/2, + set_gauge/4, fetch_gauge/3, - remove_gauge/2, remove_gauge/3, % sample set functions - create_sample_set/2, - create_sample_set/3, - create_sample_set/4, - create_sample_set/5, create_sample_set/6, - add_sample/3, add_sample/4, - fetch_sample_set/2, fetch_sample_set/3, fetch_sample_set/4, - remove_sample_set/2, remove_sample_set/3, - all_sample_set_stats/0, + % mapping functions, used to iterate over all stats, when now is used + % the current live dbs for all types are used, when then functions + % are used the db for however many minutes ago is used. map_now/1, + map_now/2, map_then/2, + map_then/3, map/4, + description/1, flush/3, config/0, all/0, @@ -146,46 +130,25 @@ start_link() -> gen_server:start_link ({local, ?MODULE}, ?MODULE, [], []). -get_state() -> - gen_server:call (?MODULE, get_state). - -create_counter (ProgId, Key) -> - create_counter (ProgId, Key, [], "", 0). -create_counter (ProgId, Key, Description) -> - create_counter (ProgId, Key, [], Description, 0). -create_counter (ProgId, Key, Context, Description) -> - create_counter (ProgId, Key, Context, Description, 0). -create_counter (ProgId, Key, Context, Description, Amount) - when is_integer (Amount), is_list (Context) -> +create_counter (ProgId, Key, Context, Description, InitialAmount) + when is_integer (InitialAmount), is_list (Context) -> InternalKey = calculate_key (ProgId, Context, counter, Key), add_new_config (InternalKey, Description), case ets:insert_new (?STATS_TABLE, - #md_metric {key = InternalKey, value = Amount}) of + #md_metric {key = InternalKey, value = InitialAmount}) of true -> ok; false -> {error, already_created} end. -increment (ProgId, Key) -> - increment (ProgId, Key, [], 1). -increment (ProgId, Key, Amount) - when is_integer (Amount) -> - increment (ProgId, Key, [], Amount); -increment (ProgId, Key, Context) - when is_list (Context) -> - increment (ProgId, Key, Context, 1). -increment (ProgId, Key, Context, Amount) +increment_counter (ProgId, Key, Context, Amount) when is_integer (Amount), is_list (Context) -> InternalKey = calculate_key (ProgId, Context, counter, Key), try_update_counter (InternalKey, Amount). -fetch_counter (ProgId, Key) -> - fetch_counter (ProgId, Key, []). fetch_counter (ProgId, Key, Context) -> InternalKey = calculate_key (ProgId, Context, counter, Key), return_if_exists (InternalKey, ?STATS_TABLE). -remove_counter (ProgId, Key) -> - remove_counter (ProgId, Key, []). remove_counter (ProgId, Key, Context) -> InternalKey = calculate_key (ProgId, Context, counter, Key), remove_metric (InternalKey, ?STATS_TABLE). @@ -240,14 +203,15 @@ try_update_counter (InternalKey = previous_value :: non_neg_integer(), previous_time :: integer() }). -create_gcounter (ProgId, Key, Context, Amount) - when is_list(Context), is_integer(Amount) -> - create_gcounter (calculate_key (ProgId, Context, gcounter, Key), Amount). +create_gcounter (ProgId, Key, Context, Description, InitialAmount) + when is_list(Context), is_integer(InitialAmount) -> + InternalKey = calculate_key (ProgId, Context, gcounter, Key), + create_gcounter_internal (InternalKey, Description, InitialAmount). -create_gcounter (InternalKey, Amount) -> - add_new_config (InternalKey, ""), +create_gcounter_internal (InternalKey, Description, InitialAmount) -> + add_new_config (InternalKey, Description), NewGCounter = #md_gcounter{ key = InternalKey, - value = Amount, + value = InitialAmount, previous_value = 0, previous_time = erlang:monotonic_time() }, case ets:insert_new (?STATS_TABLE, NewGCounter) of @@ -255,7 +219,7 @@ create_gcounter (InternalKey, Amount) -> false -> {error, already_created} end. -gincrement (ProgId, Key, Context, Amount) +increment_gcounter (ProgId, Key, Context, Amount) when is_integer (Amount), is_list (Context) -> InternalKey = calculate_key (ProgId, Context, gcounter, Key), update_gcounter (InternalKey, Amount). @@ -270,37 +234,31 @@ update_gcounter (InternalKey, Amount) -> %% default object to the original ets:update_counter call so that we can %% create the config row. error:badarg -> - case create_gcounter (InternalKey, Amount) of + case create_gcounter_internal (InternalKey, "", Amount) of ok -> {ok, Amount}; {error, already_created} -> {ok, update_counter (InternalKey, Amount)} end end. -fetch_gcounter (ProgId, Key) -> - fetch_gcounter (ProgId, Key, []). fetch_gcounter (ProgId, Key, Context) -> InternalKey = calculate_key (ProgId, Context, gcounter, Key), return_if_exists (InternalKey, ?STATS_TABLE). +remove_gcounter (ProgId, Key, Context) -> + InternalKey = calculate_key (ProgId, Context, gcounter, Key), + remove_metric (InternalKey, ?STATS_TABLE). -create_gauge (ProgId, Key) -> - create_gauge (ProgId, Key, [], "", 0). -create_gauge (ProgId, Key, Description) -> - create_gauge (ProgId, Key, [], Description, 0). -create_gauge (ProgId, Key, Context, Description) -> - create_gauge (ProgId, Key, Context, Description, 0). -create_gauge (ProgId, Key, Context, Description, Amount) -> +create_gauge (ProgId, Key, Context, Description, InitialAmount) + when is_integer (InitialAmount), is_list (Context) -> InternalKey = calculate_key (ProgId, Context, gauge, Key), add_new_config (InternalKey, Description), case ets:insert_new (?STATS_TABLE, - #md_metric {key = InternalKey, value = Amount}) of + #md_metric {key = InternalKey, value = InitialAmount}) of true -> ok; false -> {error, already_created} end. -set (ProgId, Key, Amount) -> - set (ProgId, Key, [], Amount). -set (ProgId, Key, Context, Amount) -> +set_gauge (ProgId, Key, Context, Amount) -> InternalKey = calculate_key (ProgId, Context, gauge, Key), % if we would overflow a gauge, instead of going negative just leave it @@ -328,14 +286,10 @@ set (ProgId, Key, Context, Amount) -> end end. -fetch_gauge (ProgId, Key) -> - fetch_gauge (ProgId, Key, []). fetch_gauge (ProgId, Key, Context) -> InternalKey = calculate_key (ProgId, Context, gauge, Key), return_if_exists (InternalKey, ?STATS_TABLE). -remove_gauge (ProgId, Key) -> - remove_gauge (ProgId, Key, []). remove_gauge (ProgId, Key, Context) -> InternalKey = calculate_key (ProgId, Context, gauge, Key), remove_metric (InternalKey, ?STATS_TABLE). @@ -364,22 +318,9 @@ try_update_gauge (InternalKey = end end. -create_sample_set (ProgId, Key) -> - create_sample_set (ProgId, Key, [], "", - mondemand_config:default_max_sample_size(), - mondemand_config:default_stats()). -create_sample_set (ProgId, Key, Description) -> - create_sample_set (ProgId, Key, [], Description, - mondemand_config:default_max_sample_size(), - mondemand_config:default_stats()). -create_sample_set (ProgId, Key, Context, Description) -> - create_sample_set (ProgId, Key, Context, Description, - mondemand_config:default_max_sample_size(), - mondemand_config:default_stats()). -create_sample_set (ProgId, Key, Context, Description, Max) -> - create_sample_set (ProgId, Key, Context, Description, - Max, - mondemand_config:default_stats()). +create_sample_set (ProgId, Key, Context, Description, Max, all) -> + create_sample_set (ProgId, Key, Context, Description, Max, + all_sample_set_stats()); create_sample_set (ProgId, Key, Context, Description, Max, Stats) -> InternalKey = calculate_key (ProgId, Context, statset, Key), create_sample_set_internal (minute_tab (mondemand_util:current_minute()), @@ -429,9 +370,6 @@ try_update_sampleset (CurrentMinuteStatsSetTable, InternalKey, Value) -> [M, UC] end. -add_sample (ProgId, Key, Value) -> - add_sample (ProgId, Key, [], Value). - % this implements reservoir sampling of values % http://en.wikipedia.org/wiki/Reservoir_sampling % in an ets table @@ -473,9 +411,6 @@ add_sample (ProgId, Key, Context, Value) -> I -> ets:update_element (Tid, InternalKey, {?STATSET_SUM_INDEX+I,Value}) end. -fetch_sample_set (ProgId, Key) -> - fetch_sample_set (ProgId, Key, []). - fetch_sample_set (ProgId, Key, Context) -> fetch_sample_set (ProgId, Key, Context, mondemand_util:current_minute()). @@ -484,8 +419,6 @@ fetch_sample_set (ProgId, Key, Context, Minute) -> CurrentMinuteStatsSetTable= minute_tab (Minute), return_if_exists (InternalKey, CurrentMinuteStatsSetTable). -remove_sample_set (ProgId, Key) -> - remove_sample_set (ProgId, Key, []). remove_sample_set (ProgId, Key, Context) -> InternalKey = calculate_key (ProgId, Context, statset, Key), CurrentMinuteStatsSetTable = minute_tab (mondemand_util:current_minute()), @@ -518,6 +451,12 @@ lookup_default_config () -> [] -> undefined end. +description(InternalKey) -> + case ets:lookup (?CONFIG_TABLE, InternalKey) of + [] -> ""; + [#config { description = D}] -> D + end. + add_new_config (Key, Description) -> C = lookup_default_config (), NewConfig = C#config { key = Key, @@ -557,16 +496,20 @@ type_to_single_char (gcounter) -> <<"r">>; type_to_single_char (statset) -> <<"s">>. map_now (Function) -> - CurrentMinuteMillis = mondemand_util:current(), - StatsSetTable = minute_tab (mondemand_util:current_minute()), - map (Function, ok, CurrentMinuteMillis, StatsSetTable). + map_now (Function, ok). + +map_now (Function, InitialState) -> + map_then (Function, InitialState, 0). map_then (Function, Ago) -> + map_then (Function, ok, Ago). + +map_then (Function, InitialState, Ago) -> CurrentMinuteMillis = mondemand_util:current(), PreviousMinuteMillis = CurrentMinuteMillis - 60000 * Ago, PreviousMinute = minutes_ago (mondemand_util:current_minute(), Ago), StatsSetTable = minute_tab (PreviousMinute), - map (Function, ok, PreviousMinuteMillis, StatsSetTable). + map (Function, InitialState, PreviousMinuteMillis, StatsSetTable). % I want to iterate over the config table, collapsing all metrics for a % particular program id and context into a group so they can all be processed @@ -665,33 +608,40 @@ lookup_metric (InternalKey = #mdkey {type = Type, key = Key}, CurrentMinuteStatsSetTable) -> case Type of I when I =:= counter; I =:= gauge; I =:= gcounter -> + Description = description(InternalKey), case ets:lookup (?STATS_TABLE, InternalKey) of [] -> #md_metric { key = mondemand_util:binaryify (Key), type = I, - value = 0 }; + value = 0, + description = Description }; [#md_metric {value = V}] -> #md_metric { key = mondemand_util:binaryify (Key), type = I, - value = V }; + value = V, + description = Description }; [#md_gcounter {rate = V}] -> #md_metric { key = mondemand_util:binaryify (Key), type = gauge, - value = V } + value = V, + description = Description } end; I when I =:= statset -> - #config { statistics = Stats } = lookup_config (InternalKey), + #config { statistics = Stats, description = Description } + = lookup_config (InternalKey), case ets:lookup (CurrentMinuteStatsSetTable, InternalKey) of [] -> % special case, for filling out an empty statset #md_metric { key = mondemand_util:binaryify (Key), type = I, - value = statset (0, 0, 0, 0, [], Stats) + value = statset (0, 0, 0, 0, [], Stats), + description = Description }; [Entry] -> #md_metric { key = mondemand_util:binaryify (Key), type = I, - value = ets_to_statset (Entry, Stats) + value = ets_to_statset (Entry, Stats), + description = Description } end end. @@ -799,8 +749,6 @@ init([]) -> ]), {ok, #state {}}. -handle_call (get_state, _From, State) -> - {reply, State, State}; handle_call (_Request, _From, State) -> {reply, ok, State}. @@ -1096,112 +1044,114 @@ config_perf_test_ () -> fun cleanup/1, [ % tests using create_counter first - ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1)), - ?_assertEqual (ok, create_counter (my_prog1, my_metric1)), - ?_assertEqual ({error, already_created}, create_counter (my_prog1, my_metric1)), - ?_assertEqual (0, fetch_counter (my_prog1, my_metric1)), - ?_assertEqual ({ok,1}, increment (my_prog1, my_metric1)), - ?_assertEqual (1, fetch_counter (my_prog1, my_metric1)), - ?_assertEqual ({ok,2}, increment (my_prog1, my_metric1)), - ?_assertEqual ({ok,3}, increment (my_prog1, my_metric1)), - ?_assertEqual ({ok,4}, increment (my_prog1, my_metric1)), - ?_assertEqual (4, fetch_counter (my_prog1, my_metric1)), - ?_assertEqual (true, remove_counter (my_prog1, my_metric1)), - ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1)), + ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1,[])), + ?_assertEqual (ok, create_counter (my_prog1, my_metric1,[],"",0)), + ?_assertEqual ({error, already_created}, create_counter (my_prog1, my_metric1,[],"",0)), + ?_assertEqual (0, fetch_counter (my_prog1, my_metric1,[])), + ?_assertEqual ({ok,1}, increment_counter (my_prog1, my_metric1,[],1)), + ?_assertEqual (1, fetch_counter (my_prog1, my_metric1,[])), + ?_assertEqual ({ok,2}, increment_counter (my_prog1, my_metric1,[],1)), + ?_assertEqual ({ok,3}, increment_counter (my_prog1, my_metric1,[],1)), + ?_assertEqual ({ok,4}, increment_counter (my_prog1, my_metric1,[],1)), + ?_assertEqual (4, fetch_counter (my_prog1, my_metric1,[])), + ?_assertEqual (true, remove_counter (my_prog1, my_metric1,[])), + ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1,[])), % test creation with descriptions - ?_assertEqual (ok, create_counter (my_prog1, my_metric2, "with description")), - ?_assertEqual ({ok,1}, increment (my_prog1, my_metric2)), - ?_assertEqual (1, fetch_counter (my_prog1, my_metric2)), - ?_assertEqual (true, remove_counter (my_prog1, my_metric2)), - ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric2)), + ?_assertEqual (ok, create_counter (my_prog1, my_metric2, [], "with description", 0)), + ?_assertEqual ({ok,1}, increment_counter (my_prog1, my_metric2,[],1)), + ?_assertEqual (1, fetch_counter (my_prog1, my_metric2,[])), + ?_assertEqual (true, remove_counter (my_prog1, my_metric2,[])), + ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric2,[])), % test with contexts and descriptions - ?_assertEqual (ok, create_counter (my_prog1, my_metric3, [{foo,bar}], "with context")), - ?_assertEqual ({ok,1}, increment (my_prog1, my_metric3,[{foo,bar}])), + ?_assertEqual (ok, create_counter (my_prog1, my_metric3, [{foo,bar}], "with context",0)), + ?_assertEqual ({ok,1}, increment_counter (my_prog1, my_metric3,[{foo,bar}],1)), ?_assertEqual (1, fetch_counter (my_prog1, my_metric3,[{foo,bar}])), ?_assertEqual (true, remove_counter (my_prog1, my_metric3,[{foo,bar}])), ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric3,[{foo,bar}])), % test using automatic creation of counters - ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1)), - ?_assertEqual ({ok,1}, increment (my_prog1, my_metric1)), - ?_assertEqual (1, fetch_counter (my_prog1, my_metric1)), - ?_assertEqual (true, remove_counter (my_prog1, my_metric1)), - ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1)), + ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1,[])), + ?_assertEqual ({ok,1}, increment_counter (my_prog1, my_metric1,[],1)), + ?_assertEqual (1, fetch_counter (my_prog1, my_metric1,[])), + ?_assertEqual (true, remove_counter (my_prog1, my_metric1,[])), + ?_assertEqual (undefined, fetch_counter (my_prog1, my_metric1,[])), {"counter wrapping", fun () -> - ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE - 80}, increment (my_prog1, wrapctr, ?MD_STATS_MAX_METRIC_VALUE - 80)), - ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE - 1}, increment (my_prog1, wrapctr, 79)), - ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE}, increment (my_prog1, wrapctr, 1)), - ?assertEqual ({ok, 0}, increment (my_prog1, wrapctr, 1)), - ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE - 10}, increment (my_prog1, wrapctr, ?MD_STATS_MAX_METRIC_VALUE - 10)), - ?assertEqual ({ok, 3}, increment (my_prog1, wrapctr, 14)), - ?assertEqual ({ok, 0}, increment (my_prog1, wrapctr, -3)), - ?assertEqual ({ok, ?MD_STATS_MIN_METRIC_VALUE}, increment (my_prog1, wrapctr, ?MD_STATS_MIN_METRIC_VALUE)), - ?assertEqual ({ok, 0}, increment (my_prog1, wrapctr, -1)), + ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE - 80}, increment_counter (my_prog1, wrapctr, [], ?MD_STATS_MAX_METRIC_VALUE - 80)), + ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE - 1}, increment_counter (my_prog1, wrapctr, [], 79)), + ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE}, increment_counter (my_prog1, wrapctr, [], 1)), + ?assertEqual ({ok, 0}, increment_counter (my_prog1, wrapctr, [], 1)), + ?assertEqual ({ok, ?MD_STATS_MAX_METRIC_VALUE - 10}, increment_counter (my_prog1, wrapctr, [], ?MD_STATS_MAX_METRIC_VALUE - 10)), + ?assertEqual ({ok, 3}, increment_counter (my_prog1, wrapctr, [], 14)), + ?assertEqual ({ok, 0}, increment_counter (my_prog1, wrapctr, [], -3)), + ?assertEqual ({ok, ?MD_STATS_MIN_METRIC_VALUE}, increment_counter (my_prog1, wrapctr, [], ?MD_STATS_MIN_METRIC_VALUE)), + ?assertEqual ({ok, 0}, increment_counter (my_prog1, wrapctr, [], -1)), % always wrap to zero - ?assertEqual ({ok, ?MD_STATS_MIN_METRIC_VALUE}, increment (my_prog1, wrapctr, ?MD_STATS_MIN_METRIC_VALUE)), - ?assertEqual ({ok, 0}, increment (my_prog1, wrapctr, -10)), - ?assertEqual (true, remove_counter (my_prog1, wrapctr)) + ?assertEqual ({ok, ?MD_STATS_MIN_METRIC_VALUE}, increment_counter (my_prog1, wrapctr, [], ?MD_STATS_MIN_METRIC_VALUE)), + ?assertEqual ({ok, 0}, increment_counter (my_prog1, wrapctr, [], -10)), + ?assertEqual (true, remove_counter (my_prog1, wrapctr, [])) end }, % tests using create_gauge first - ?_assertEqual (undefined, fetch_gauge (my_prog1, my_metric1)), - ?_assertEqual (ok, create_gauge (my_prog1, my_metric1)), - ?_assertEqual ({error, already_created}, create_gauge (my_prog1, my_metric1)), - ?_assertEqual (0, fetch_gauge (my_prog1, my_metric1)), - ?_assertEqual (ok, set (my_prog1, my_metric1, 5)), - ?_assertEqual (5, fetch_gauge (my_prog1, my_metric1)), - ?_assertEqual (ok, set (my_prog1, my_metric1, 6)), - ?_assertEqual (ok, set (my_prog1, my_metric1, 4)), - ?_assertEqual (4, fetch_gauge (my_prog1, my_metric1)), - ?_assertEqual (true, remove_gauge (my_prog1, my_metric1)), - ?_assertEqual (undefined, fetch_gauge (my_prog1, my_metric1)), + ?_assertEqual (undefined, fetch_gauge (my_prog1, my_metric1,[])), + ?_assertEqual (ok, create_gauge (my_prog1, my_metric1,[],"",0)), + ?_assertEqual ({error, already_created}, create_gauge (my_prog1, my_metric1,[],"",0)), + ?_assertEqual (0, fetch_gauge (my_prog1, my_metric1,[])), + ?_assertEqual (ok, set_gauge (my_prog1, my_metric1, [], 5)), + ?_assertEqual (5, fetch_gauge (my_prog1, my_metric1,[])), + ?_assertEqual (ok, set_gauge (my_prog1, my_metric1,[],6)), + ?_assertEqual (ok, set_gauge (my_prog1, my_metric1,[],4)), + ?_assertEqual (4, fetch_gauge (my_prog1, my_metric1,[])), + ?_assertEqual (true, remove_gauge (my_prog1, my_metric1,[])), + ?_assertEqual (undefined, fetch_gauge (my_prog1, my_metric1,[])), {"gcounter", fun () -> ?assertEqual (#md_metric.key, #md_gcounter.key), ?assertEqual (#md_metric.value, #md_gcounter.value), - ?assertEqual (undefined, fetch_gcounter(my_prog1, gctr)), - ?assertEqual ({ok, 1}, gincrement (my_prog1, gctr, [], 1)), - ?assertEqual ({ok, 4}, gincrement (my_prog1, gctr, [], 3)), - ?assertEqual (0, fetch_gcounter(my_prog1, gctr)), + ?assertEqual (undefined, fetch_gcounter(my_prog1, gctr,[])), + ?assertEqual ({ok, 1}, increment_gcounter (my_prog1, gctr, [], 1)), + ?assertEqual ({ok, 4}, increment_gcounter (my_prog1, gctr, [], 3)), + ?assertEqual (0, fetch_gcounter(my_prog1, gctr,[])), Key = calculate_key(my_prog1, [], gcounter, gctr), finalize_metric(Key, ?STATS_TABLE), ?assertMatch (V when is_number(V) andalso V >= 4, - fetch_gcounter(my_prog1, gctr)), + fetch_gcounter(my_prog1, gctr,[])), ?assertMatch (#md_metric{type = gauge, value = V} when is_number(V) andalso V >= 4, lookup_metric(Key, ?STATS_TABLE)), - %% No gincrement in period => rate == 0. + %% No increment_gcounter in period => rate == 0. finalize_metric(Key, ?STATS_TABLE), - ?assertEqual (0, fetch_gcounter(my_prog1, gctr)), + ?assertEqual (0, fetch_gcounter(my_prog1, gctr,[])), %% Test that rate is not negative when counter wraps. - gincrement (my_prog1, gctr, [], ?MD_STATS_MAX_METRIC_VALUE - 80), + increment_gcounter (my_prog1, gctr, [], ?MD_STATS_MAX_METRIC_VALUE - 80), finalize_metric(Key, ?STATS_TABLE), - gincrement (my_prog1, gctr, [], 150), + increment_gcounter (my_prog1, gctr, [], 150), finalize_metric(Key, ?STATS_TABLE), ?assertMatch (V when is_number(V) andalso V > 0, - fetch_gcounter(my_prog1, gctr)) + fetch_gcounter(my_prog1, gctr,[])) end }, % tests using sample sets - ?_assertEqual (undefined, fetch_sample_set (my_prog1, my_metric1)), + ?_assertEqual (undefined, fetch_sample_set (my_prog1, my_metric1,[])), % default size is 10 - ?_assertEqual (ok, create_sample_set (my_prog1, my_metric1)), + ?_assertEqual (ok, create_sample_set (my_prog1, my_metric1,[],"", + mondemand_config:default_max_sample_size(), + mondemand_config:default_stats())), % add some fun () -> [ - ?assertEqual (true, add_sample (my_prog1, my_metric1, N)) + ?assertEqual (true, add_sample (my_prog1, my_metric1, [], N)) || N <- lists:seq (1, 5) ] end, % check their values fun () -> - SS = fetch_sample_set (my_prog1, my_metric1), + SS = fetch_sample_set (my_prog1, my_metric1, []), ?assertEqual (5, mondemand_statsmsg:get_statset (count, SS)), ?assertEqual (15, mondemand_statsmsg:get_statset (sum, SS)), ?assertEqual (1, mondemand_statsmsg:get_statset (min, SS)), @@ -1210,12 +1160,12 @@ config_perf_test_ () -> % add a few more fun () -> [ - ?assertEqual (true, add_sample (my_prog1, my_metric1, N)) + ?assertEqual (true, add_sample (my_prog1, my_metric1,[], N)) || N <- lists:seq (6, 20) ] end, fun () -> - SS = fetch_sample_set (my_prog1, my_metric1), + SS = fetch_sample_set (my_prog1, my_metric1,[]), ?assertEqual (20, mondemand_statsmsg:get_statset (count, SS)), ?assertEqual (lists:sum(lists:seq(1,20)), mondemand_statsmsg:get_statset (sum, SS)), @@ -1227,7 +1177,7 @@ config_perf_test_ () -> Max = mondemand_statsmsg:get_statset (max, SS), ?assertEqual (true, Max > 1) end, - ?_assertEqual (true, remove_sample_set (my_prog1, my_metric1)), + ?_assertEqual (true, remove_sample_set (my_prog1, my_metric1,[])), fun () -> ok = create_sample_set(foo,bar,[],"",100, all_sample_set_stats()), SS = fetch_sample_set(foo,bar,[]), @@ -1237,13 +1187,13 @@ config_perf_test_ () -> % turns out there was a bug with median when there was only one % element in the set, so add one, then check the values are as % we expect - mondemand_statdb:add_sample(foo,bar,[],10), + add_sample(foo,bar,[],10), SS1 = fetch_sample_set(foo,bar,[]), [ ?assertEqual (10, mondemand_statsmsg:get_statset (S, SS1)) || S <- all_sample_set_stats (), S =/= count, S =/= median ], ?assertEqual (1, mondemand_statsmsg:get_statset (count, SS1)), ?assertEqual (0, mondemand_statsmsg:get_statset (median, SS1)), - mondemand_statdb:remove_sample_set(foo,bar,[]), + remove_sample_set(foo,bar,[]), % then for completeness just check that all the values are what % we would expect for 100 samples @@ -1261,7 +1211,7 @@ config_perf_test_ () -> ?assertEqual (95,mondemand_statsmsg:get_statset (pctl_95, SS2)), ?assertEqual (98,mondemand_statsmsg:get_statset (pctl_98, SS2)), ?assertEqual (99,mondemand_statsmsg:get_statset (pctl_99, SS2)), - mondemand_statdb:remove_sample_set(foo,bar,[]) + remove_sample_set(foo,bar,[]) end ] }. diff --git a/src/mondemand_statsmsg.erl b/src/mondemand_statsmsg.erl index e93310e..d3e4d08 100644 --- a/src/mondemand_statsmsg.erl +++ b/src/mondemand_statsmsg.erl @@ -54,7 +54,8 @@ from_lwes/1, statset_from_string/1, statset_to_list/1, - statset_to_string/1 + statset_to_string/1, + to_prometheus/1 ]). % Context is of the form @@ -216,6 +217,95 @@ from_lwes (#lwes_event { attrs = Data}) -> } }. +context_to_prometheus ([]) -> + <<"">>; +context_to_prometheus (Context) -> + [ + <<"{">>, + mondemand_util:join([ [mondemand_util:stringify(K), + <<"=\"">>, mondemand_util:stringify(V), <<"\"">>] + || {K, V} <- lists:sort(Context) ], + <<",">>), + <<"}">> + ]. + +% this function normalizes the name according to the prometheus naming +% rules. These suggest that counters end with _total, so if it's a counter +% we'll first check to see if it already ends with '_total' and if not add +% it to the final name. +normalize_prometheus_name (Type, ProgramId, Key) when is_binary(Key) -> + normalize_prometheus_name (Type, ProgramId, binary_to_list(Key)); +normalize_prometheus_name (Type, ProgramId, Key) -> + case Type of + T when T =:= gauge ; T =:= statset -> [ProgramId, "_", Key]; + counter -> + case lists:reverse(Key) of + [ $l,$a,$t,$o,$t,$_ | _] -> [ProgramId, "_", Key]; + _ -> [ProgramId, "_", Key, "_total"] + end + end. + +metric_to_prometheus (ProgramId, Context, + #md_metric { key = Name, type = counter, + value = Value, description = Description }, + _CollectTime) -> + FinalName = normalize_prometheus_name(counter,ProgramId,Name), + [ <<"# TYPE ">>, FinalName,<<" counter\n">>, + <<"# HELP ">>, FinalName,<<" ">>,Description,<<"\n">>, + FinalName, context_to_prometheus (Context), + <<" ">>, mondemand_util:stringify(Value,<<"0">>),<<"\n">> ]; +metric_to_prometheus (ProgramId, Context, + #md_metric { key = Name, type = gauge, + value = Value, description = Description }, + _CollectTime) -> + FinalName = normalize_prometheus_name(gauge,ProgramId,Name), + [ <<"# TYPE ">>, FinalName,<<" gauge\n">>, + <<"# HELP ">>, FinalName,<<" ">>,Description,<<"\n">>, + FinalName, context_to_prometheus (Context), + <<" ">>, mondemand_util:stringify(Value,<<"0">>),<<"\n">> ]; +metric_to_prometheus (ProgramId, Context, + #md_metric { key = Name, type = statset, + value = StatSet, description = Description }, + CollectTime) -> + FinalName = normalize_prometheus_name(statset,ProgramId,Name), + % use CollectTime in this case as it's a minute ago for statsets + [ <<"# TYPE ">>, FinalName,<<" summary\n">>, + <<"# HELP ">>, FinalName,<<" ">>,Description,<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"0.5">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.median,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"0.75">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_75,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"0.90">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_90,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"0.95">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_95,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"0.98">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_98,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"0.99">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_99,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName, context_to_prometheus([{<<"quantile">>,<<"1.0">>} | Context]), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.max,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName,<<"_sum">>,context_to_prometheus(Context), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.sum,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + FinalName,<<"_count">>,context_to_prometheus(Context), + <<" ">>,mondemand_util:stringify(StatSet#md_statset.count,<<"0">>), + <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">> + ]. + +to_prometheus (#md_stats_msg{ prog_id = ProgramId, + collect_time = CollectTime, + context = Context, + metrics = Metrics }) -> + [ metric_to_prometheus(ProgramId, Context, Metric, CollectTime) || Metric <- Metrics ]. + to_lwes (L) when is_list (L) -> MaxMetrics = mondemand_config:max_metrics (), lists:flatten (lists:map (fun (X) -> to_lwes (X, MaxMetrics) end, L) ); @@ -282,6 +372,7 @@ to_lwes (MondemandStatsMsg = #md_stats_msg { to_lwes (MondemandStatsMsg = #md_stats_msg {}, _, EventList) -> [ to_lwes (MondemandStatsMsg) | EventList ]. + metric_to_lwes (MetricIndex, #md_metric { key = Name, type = statset, value = Value }) -> [ { ?LWES_STRING, @@ -2125,14 +2216,14 @@ statsmsg_test_ () -> % test that to_lwes of long statsmsg returns several events that are correct [E1, E2] = to_lwes(S), % test that the metric counts and first metric in events match expected values - ?assert (lists:member({?LWES_U_INT_16, <<"num">>, MaxMetrics}, E1#lwes_event.attrs)), - ?assert (lists:member({?LWES_STRING, <<"k0">>, <<"baz1">>}, E1#lwes_event.attrs)), - ?assert (lists:member({?LWES_STRING, <<"t0">>, <<"gauge">>}, E1#lwes_event.attrs)), - ?assert (lists:member({?LWES_INT_64, <<"v0">>, 1}, E1#lwes_event.attrs)), - ?assert (lists:member({?LWES_U_INT_16, <<"num">>, 1}, E2#lwes_event.attrs)), - ?assert (lists:member({?LWES_STRING, <<"k0">>, E2K0Name}, E2#lwes_event.attrs)), - ?assert (lists:member({?LWES_STRING, <<"t0">>, <<"gauge">>}, E2#lwes_event.attrs)), - ?assert (lists:member({?LWES_INT_64, <<"v0">>, MetricsCount}, E2#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_U_INT_16, <<"num">>, MaxMetrics}, E1#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_STRING, <<"k0">>, <<"baz1">>}, E1#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_STRING, <<"t0">>, <<"gauge">>}, E1#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_INT_64, <<"v0">>, 1}, E1#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_U_INT_16, <<"num">>, 1}, E2#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_STRING, <<"k0">>, E2K0Name}, E2#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_STRING, <<"t0">>, <<"gauge">>}, E2#lwes_event.attrs)), + ?assertEqual (true,lists:member({?LWES_INT_64, <<"v0">>, MetricsCount}, E2#lwes_event.attrs)), % test that the prog_id and contexts match in the two lwes events {0, S1} = from_lwes ( diff --git a/src/mondemand_sup.erl b/src/mondemand_sup.erl index 428227a..8bf55fb 100644 --- a/src/mondemand_sup.erl +++ b/src/mondemand_sup.erl @@ -37,8 +37,29 @@ init([]) -> [] end, + {ok, Cwd} = file:get_cwd(), + HTTPDChild = + case mondemand_config:httpd_enabled() of + true -> + [{mondemand_httpd, + {inets, start, + [httpd, + [{port, mondemand_config:httpd_port()}, + {bind_address, mondemand_config:httpd_address()}, + {server_name,"md"}, + {server_root, Cwd}, + {document_root, Cwd}, + {modules, [mondemand_httpd]} + ] + ] + }, permanent, 5000, worker, [mondemand_httpd] + } + ]; + false -> [] + end, + ServiceChildren = - VMStatsChild ++ + VMStatsChild ++ HTTPDChild ++ [ { mondemand_statdb, {mondemand_statdb, start_link, []}, diff --git a/src/mondemand_util.erl b/src/mondemand_util.erl index 738f1fa..f6b6a74 100644 --- a/src/mondemand_util.erl +++ b/src/mondemand_util.erl @@ -16,8 +16,10 @@ -export ([ find_in_dict/2, find_in_dict/3, binaryify/1, + binaryify/2, binaryify_context/1, stringify/1, + stringify/2, integerify/1, floatify/1, join/2 @@ -222,6 +224,11 @@ millis_to_next_round_minute (Ts) -> MillisSinceEpoch = now_to_epoch_millis (Ts), NextMinuteSinceEpochAsMillis - MillisSinceEpoch. +binaryify (undefined, Default) when is_binary(Default) -> + Default; +binaryify (B, _) -> + binaryify (B). + binaryify (B) when is_binary (B) -> B; binaryify (O) -> @@ -230,6 +237,11 @@ binaryify (O) -> binaryify_context (Context) -> [ {binaryify (K), binaryify (V)} || {K,V} <- Context]. +stringify (undefined, Default) when is_list(Default) -> + Default; +stringify (V, _) -> + stringify(V). + stringify (I) when is_integer (I) -> integer_to_list (I); stringify (F) when is_float (F) -> From b779b60f1b6bb239fd303ffe5c7825ecb45f929d Mon Sep 17 00:00:00 2001 From: Anthony Molinaro Date: Tue, 2 Jul 2019 21:09:31 +0000 Subject: [PATCH 2/4] support passing of description with raw stats sending --- include/mondemand.hrl | 2 +- src/mondemand_statsmsg.erl | 27 ++++++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/include/mondemand.hrl b/include/mondemand.hrl index e751aa5..fa2ad64 100644 --- a/include/mondemand.hrl +++ b/include/mondemand.hrl @@ -53,7 +53,7 @@ -record (md_metric, { type, key, value, - description + description = "" }). -record (md_statset, { count, sum, diff --git a/src/mondemand_statsmsg.erl b/src/mondemand_statsmsg.erl index d3e4d08..b7d2163 100644 --- a/src/mondemand_statsmsg.erl +++ b/src/mondemand_statsmsg.erl @@ -72,11 +72,6 @@ new (ProgId, Context, Metrics, Host) -> new (ProgId, Context, Metrics, Host, undefined). new (ProgId, Context, Metrics, Host, CollectTime) -> new (ProgId, Context, Metrics, Host, CollectTime, undefined). -new (ProgId, Context, Metrics = [{_,_,_}|_], Host, CollectTime, SendTime) -> - ValidatedMetrics = [ #md_metric { type = T, key = K, value = V } - || { T, K, V } - <- Metrics ], - new (ProgId, Context, ValidatedMetrics, Host, CollectTime, SendTime); new (ProgId, Context, Metrics = [#md_metric{}|_], Host, CollectTime, SendTime) -> #md_stats_msg { collect_time = CollectTime, @@ -87,7 +82,15 @@ new (ProgId, Context, Metrics = [#md_metric{}|_], context = Context, num_metrics = length (Metrics), metrics = Metrics - }. + }; +new (ProgId, Context, Metrics = [M|_], Host, CollectTime, SendTime) + when is_tuple(M) -> + ValidatedMetrics = + lists:map( + fun({T,K,V}) -> #md_metric { type = T, key = K, value = V }; + ({T,K,V,D}) -> #md_metric { type = T, key = K, value = V, description = D } + end, Metrics), + new (ProgId, Context, ValidatedMetrics, Host, CollectTime, SendTime). new_statset () -> #md_statset {}. @@ -237,11 +240,17 @@ normalize_prometheus_name (Type, ProgramId, Key) when is_binary(Key) -> normalize_prometheus_name (Type, ProgramId, binary_to_list(Key)); normalize_prometheus_name (Type, ProgramId, Key) -> case Type of - T when T =:= gauge ; T =:= statset -> [ProgramId, "_", Key]; + T when T =:= gauge ; T =:= statset -> + [mondemand_util:stringify(ProgramId), + <<"_">>, mondemand_util:stringify(Key)]; counter -> case lists:reverse(Key) of - [ $l,$a,$t,$o,$t,$_ | _] -> [ProgramId, "_", Key]; - _ -> [ProgramId, "_", Key, "_total"] + [ $l,$a,$t,$o,$t,$_ | _] -> + [mondemand_util:stringify(ProgramId), + <<"_">>, mondemand_util:stringify(Key)]; + _ -> + [mondemand_util:stringify(ProgramId), + <<"_">>, mondemand_util:stringify(Key),<<"_total">>] end end. From 5a04e3e4ed236fc0e0284988395d3ee2bacfcbcd Mon Sep 17 00:00:00 2001 From: Anthony Molinaro Date: Tue, 2 Jul 2019 21:36:33 +0000 Subject: [PATCH 3/4] allow stats only to be disabled from sending to lwes --- src/mondemand.erl | 23 ++++++++++++++++------- src/mondemand_config.erl | 20 ++++++++++++++++++-- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/mondemand.erl b/src/mondemand.erl index 265b95e..bc17961 100644 --- a/src/mondemand.erl +++ b/src/mondemand.erl @@ -460,11 +460,14 @@ send_annotation (Id, Time, Description, Text, Tags, Context) -> send_stats (_, _, []) -> ok; send_stats (ProgId, Context, Stats) -> - Event = - mondemand_statsmsg:to_lwes ( - mondemand_statsmsg:new (ProgId, Context, Stats) - ), - send_event (Event). + StatsMsg = mondemand_statsmsg:new (ProgId, Context, Stats), + case not mondemand_config:lwes_stats_disabled() of + true -> + Event = mondemand_statsmsg:to_lwes (StatsMsg), + send_event (Event); + false -> + ok + end. flush ({FlushModule, FlushStatePrepFunction, FlushFunction}) -> case mondemand_config:vmstats_prog_id () of @@ -645,10 +648,16 @@ send_event (Event = #lwes_event { name = Name }) -> gen_server:cast (?MODULE, {send, Name, lwes_event:to_binary (Event)}). flush_state_init () -> - ok. + % for flushing keep track of if stats are enabled and use that as the state + case not mondemand_config:lwes_stats_disabled() of + true -> true; + false -> false + end. -flush_one_stat (StatsMsg, State) -> +flush_one_stat (StatsMsg, true) -> send_event (mondemand_statsmsg:to_lwes (StatsMsg)), + true; +flush_one_stat (_, State) -> State. open_all (Config) -> diff --git a/src/mondemand_config.erl b/src/mondemand_config.erl index 91d9413..1cace38 100644 --- a/src/mondemand_config.erl +++ b/src/mondemand_config.erl @@ -24,6 +24,7 @@ default_stats/0, send_interval/0, lwes_config/0, + lwes_stats_disabled/0, minutes_to_keep/0, max_metrics/0, parse_config/1, @@ -51,6 +52,7 @@ -define (MOCHI_SENDER_HOST, mondemand_sender_host_global). -define (MOCHI_MAX_METRICS, mondemand_max_metrics_global). +-define (MOCHI_LWES_STATS_DISABLED, mondemand_lwes_stats_disabled). % this function is meant to be called before the supervisor and % pulls all those configs which are mostly static. @@ -66,11 +68,18 @@ init () -> undefined -> ?DEFAULT_MAX_METRICS; {ok, M} -> M end, - mondemand_global:put (?MOCHI_MAX_METRICS, MaxMetrics). + mondemand_global:put (?MOCHI_MAX_METRICS, MaxMetrics), + LwesStatsDisabled = + case application:get_env (mondemand, lwes_stats_disabled) of + {ok, B} when is_boolean (B) -> B; + _ -> false + end, + mondemand_global:put (?MOCHI_LWES_STATS_DISABLED, LwesStatsDisabled). clear() -> mondemand_global:delete(?MOCHI_SENDER_HOST), - mondemand_global:delete(?MOCHI_MAX_METRICS). + mondemand_global:delete(?MOCHI_MAX_METRICS), + mondemand_global:delete(?MOCHI_LWES_STATS_DISABLED). host () -> mondemand_global:get (?MOCHI_SENDER_HOST). @@ -132,6 +141,13 @@ lwes_config () -> end end. +% If you are using the prometheus exporter you probably don't want +% to send lwes stats, but it's pretty baked in at the moment, so this +% will be a simple way to just turn off emission of stats but otherwise +% continue to function as normal +lwes_stats_disabled() -> + mondemand_global:get (?MOCHI_LWES_STATS_DISABLED). + valid_config (Config) when is_list (Config) -> lists:foldl (fun (T, A) -> lists:keymember (T, 1, Config) andalso A From 8fad7e06f9e049c5010553017c49cdc90c6ab201 Mon Sep 17 00:00:00 2001 From: Anthony Molinaro Date: Tue, 2 Jul 2019 23:56:01 +0000 Subject: [PATCH 4/4] keep raw sends around, use collect times for all metrics --- include/mondemand.hrl | 3 ++- mondemand_dev.config | 7 ++--- src/mondemand.erl | 24 ++++++++++++----- src/mondemand_statdb.erl | 44 ++++++++++++++++++++++++++++--- src/mondemand_statsmsg.erl | 53 ++++++++++++++++++++++++++------------ src/mondemand_util.erl | 13 +++++++++- 6 files changed, 113 insertions(+), 31 deletions(-) diff --git a/include/mondemand.hrl b/include/mondemand.hrl index fa2ad64..81e215d 100644 --- a/include/mondemand.hrl +++ b/include/mondemand.hrl @@ -53,7 +53,8 @@ -record (md_metric, { type, key, value, - description = "" + description = "", + collect_time = undefined }). -record (md_statset, { count, sum, diff --git a/mondemand_dev.config b/mondemand_dev.config index 16508d2..f9a8dfc 100644 --- a/mondemand_dev.config +++ b/mondemand_dev.config @@ -4,9 +4,10 @@ { lwes_channel, { 1, [ { "127.0.0.1", 21512 } ] } } % { lwes_channel, { 1, [ { "127.0.0.1", 20602 } ] } }, % { lwes_channel, { 2, [{"127.0.0.1",20602}, {"172.16.107.128", 20602}] } }, -% { config_file, "mondemand.conf" }, -% , { vmstats, [ { program_id, mondemand_erlang } ] } -% { send_interval, 5 } +% , { config_file, "mondemand.conf" }, + , { vmstats, [ { program_id, erlang_vm } ] } +% , { lwes_stats_disabled, true } +% , { send_interval, 5 } , { httpd, [ {port, 8082} ] } ] } diff --git a/src/mondemand.erl b/src/mondemand.erl index bc17961..d924ca9 100644 --- a/src/mondemand.erl +++ b/src/mondemand.erl @@ -300,12 +300,21 @@ export_as_prometheus () -> % we export a 'minute ago' which actually just effects statsets. This uses % the previous minutes statset db which will be complete and will export that % time as well - mondemand_statdb:map_then ( - fun (S,A) -> - [mondemand_statsmsg:to_prometheus(S) | A] - end, - [], - 1). + Stats = + mondemand_statdb:map_then ( + fun (S,A) -> + [mondemand_statsmsg:to_prometheus(S) | A] + end, + [], + 1), + Raw = + mondemand_statdb:foldl_raw ( + fun (S, A) -> + SM = mondemand_statsmsg:to_prometheus(S), + [SM | A] + end, + Stats), + Raw. trace_info_in_dict (Dict) -> trace_owner_in_dict (Dict) andalso trace_id_in_dict (Dict). @@ -461,6 +470,9 @@ send_stats (_, _, []) -> ok; send_stats (ProgId, Context, Stats) -> StatsMsg = mondemand_statsmsg:new (ProgId, Context, Stats), + % always add to raw cache + mondemand_statdb:add_raw(ProgId, Context, StatsMsg), + % only send to lwes if not disabled case not mondemand_config:lwes_stats_disabled() of true -> Event = mondemand_statsmsg:to_lwes (StatsMsg), diff --git a/src/mondemand_statdb.erl b/src/mondemand_statdb.erl index 0b1a0d4..adb1db4 100644 --- a/src/mondemand_statdb.erl +++ b/src/mondemand_statdb.erl @@ -78,6 +78,8 @@ map_then/3, map/4, + add_raw/3, % (ProgId, Context, StatsMsg) + foldl_raw/2, % (Function/2, Accumulator0) -> AccumulatorN description/1, flush/3, config/0, @@ -105,6 +107,18 @@ max_sample_size, statistics }). + +% The raw db is used when raw stats are sent via mondemand:send_stats/3. +% The key is the prog_id and the context list as that should be the same +% from one call to the next. The values kept are the collect_time which +% will be the time mondemand:send_stats/3 is called, and the +-record (raw_key, {prog_id, + context = [] + }). +-record (raw_entry, {key, statsmsg}). +-define (RAW_KEY_INDEX, #raw_entry.key). +-define (RAW_TABLE, md_raw). + -record (map_state, {host, collect_time, stats_set_table, user_state}). -define (STATS_TABLE, md_stats). @@ -614,17 +628,20 @@ lookup_metric (InternalKey = #mdkey {type = Type, key = Key}, #md_metric { key = mondemand_util:binaryify (Key), type = I, value = 0, - description = Description }; + description = Description, + collect_time = mondemand_util:millis_since_epoch() }; [#md_metric {value = V}] -> #md_metric { key = mondemand_util:binaryify (Key), type = I, value = V, - description = Description }; + description = Description, + collect_time = mondemand_util:millis_since_epoch() }; [#md_gcounter {rate = V}] -> #md_metric { key = mondemand_util:binaryify (Key), type = gauge, value = V, - description = Description } + description = Description, + collect_time = mondemand_util:millis_since_epoch() } end; I when I =:= statset -> #config { statistics = Stats, description = Description } @@ -704,6 +721,19 @@ all () -> State end). +add_raw (ProgId, Context, StatsMsg) -> + CollectTime = mondemand_util:millis_since_epoch(), + Key = #raw_key {prog_id = ProgId, context = Context}, + StatsMsgWithTime = mondemand_statsmsg:collect_time(StatsMsg,CollectTime), + ets:insert (?RAW_TABLE, #raw_entry{key = Key, statsmsg = StatsMsgWithTime}). + +foldl_raw (Function, InitialState) -> + ets:foldl(fun(#raw_entry {statsmsg = SM}, A) -> + Function(SM, A) + end, + InitialState, + ?RAW_TABLE). + %-=====================================================================- %- gen_server callbacks - %-=====================================================================- @@ -747,6 +777,14 @@ init([]) -> {read_concurrency, false}, {keypos, ?METRIC_KEY_INDEX} ]), + % also a table to keep track of raw stats passed to mondemand + ets:new (?RAW_TABLE, [ set, + public, + named_table, + {write_concurrency, true}, + {read_concurrency, false}, + {keypos, ?RAW_KEY_INDEX} + ]), {ok, #state {}}. handle_call (_Request, _From, State) -> diff --git a/src/mondemand_statsmsg.erl b/src/mondemand_statsmsg.erl index b7d2163..409374a 100644 --- a/src/mondemand_statsmsg.erl +++ b/src/mondemand_statsmsg.erl @@ -38,6 +38,7 @@ prog_id/1, host/1, collect_time/1, + collect_time/2, send_time/1, context/1, context_value/2, @@ -254,59 +255,76 @@ normalize_prometheus_name (Type, ProgramId, Key) -> end end. +determine_collect_time (ListOfTimes) when is_list(ListOfTimes) -> + case mondemand_util:first_defined(ListOfTimes) of + undefined -> mondemand_util:millis_since_epoch(); + T -> T + end. + metric_to_prometheus (ProgramId, Context, #md_metric { key = Name, type = counter, - value = Value, description = Description }, - _CollectTime) -> + value = Value, description = Description, + collect_time = MetricCollectTime }, + MsgCollectTime) -> FinalName = normalize_prometheus_name(counter,ProgramId,Name), [ <<"# TYPE ">>, FinalName,<<" counter\n">>, <<"# HELP ">>, FinalName,<<" ">>,Description,<<"\n">>, FinalName, context_to_prometheus (Context), - <<" ">>, mondemand_util:stringify(Value,<<"0">>),<<"\n">> ]; + <<" ">>, mondemand_util:stringify(Value,<<"0">>), + <<" ">>,mondemand_util:stringify( + determine_collect_time([MetricCollectTime, MsgCollectTime])), + <<"\n">> ]; metric_to_prometheus (ProgramId, Context, #md_metric { key = Name, type = gauge, - value = Value, description = Description }, - _CollectTime) -> + value = Value, description = Description, + collect_time = MetricCollectTime }, + MsgCollectTime) -> FinalName = normalize_prometheus_name(gauge,ProgramId,Name), [ <<"# TYPE ">>, FinalName,<<" gauge\n">>, <<"# HELP ">>, FinalName,<<" ">>,Description,<<"\n">>, FinalName, context_to_prometheus (Context), - <<" ">>, mondemand_util:stringify(Value,<<"0">>),<<"\n">> ]; + <<" ">>, mondemand_util:stringify(Value,<<"0">>), + <<" ">>,mondemand_util:stringify( + determine_collect_time([MetricCollectTime, MsgCollectTime])), + <<"\n">> ]; metric_to_prometheus (ProgramId, Context, #md_metric { key = Name, type = statset, - value = StatSet, description = Description }, - CollectTime) -> + value = StatSet, description = Description, + collect_time = MetricCollectTime }, + MsgCollectTime) -> FinalName = normalize_prometheus_name(statset,ProgramId,Name), + CollectTime = mondemand_util:stringify( + determine_collect_time([MetricCollectTime, MsgCollectTime])), % use CollectTime in this case as it's a minute ago for statsets [ <<"# TYPE ">>, FinalName,<<" summary\n">>, <<"# HELP ">>, FinalName,<<" ">>,Description,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"0.5">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.median,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"0.75">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_75,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"0.90">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_90,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"0.95">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_95,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"0.98">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_98,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"0.99">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.pctl_99,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName, context_to_prometheus([{<<"quantile">>,<<"1.0">>} | Context]), <<" ">>,mondemand_util:stringify(StatSet#md_statset.max,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName,<<"_sum">>,context_to_prometheus(Context), <<" ">>,mondemand_util:stringify(StatSet#md_statset.sum,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">>, + <<" ">>,CollectTime,<<"\n">>, FinalName,<<"_count">>,context_to_prometheus(Context), <<" ">>,mondemand_util:stringify(StatSet#md_statset.count,<<"0">>), - <<" ">>,mondemand_util:stringify(CollectTime),<<"\n">> + <<" ">>,CollectTime,<<"\n">> ]. to_prometheus (#md_stats_msg{ prog_id = ProgramId, @@ -416,6 +434,7 @@ prog_id (#md_stats_msg { prog_id = ProgId }) -> ProgId. host (#md_stats_msg { host = Host }) -> Host. collect_time (#md_stats_msg { collect_time = CollectTime }) -> CollectTime. +collect_time (M = #md_stats_msg {}, CollectTime) -> M#md_stats_msg { collect_time = CollectTime }. send_time (#md_stats_msg { send_time = SendTime }) -> SendTime. context (#md_stats_msg { context = Context }) -> Context. diff --git a/src/mondemand_util.erl b/src/mondemand_util.erl index f6b6a74..4f57acb 100644 --- a/src/mondemand_util.erl +++ b/src/mondemand_util.erl @@ -22,7 +22,8 @@ stringify/2, integerify/1, floatify/1, - join/2 + join/2, + first_defined/1 ]). %% Time functions -export ([ @@ -308,6 +309,16 @@ join ([H|T], S, []) -> join ([H|T], S, A) -> join (T,S,[H,S|A]). +first_defined(L) when is_list(L) -> + first_defined0(L). + +first_defined0([undefined|R]) -> + first_defined0(R); +first_defined0([D|_]) -> + D; +first_defined0([]) -> + undefined. + normalize_ip (undefined) -> undefined; normalize_ip (IP = {_,_,_,_}) -> IP;