diff --git a/src/telemetry_poller.erl b/src/telemetry_poller.erl index 94156cd..a8c4723 100644 --- a/src/telemetry_poller.erl +++ b/src/telemetry_poller.erl @@ -1,187 +1,274 @@ -%%%--------------------------------------------------- -%% @doc -%% A time-based poller to periodically dispatch Telemetry events. -%% -%% A poller is a process start in your supervision tree with a list -%% of measurements to perform periodically. On start it expects the -%% period in milliseconds and a list of measurements to perform. Initial delay -%% is an optional parameter that sets time delay in milliseconds before starting -%% measurements: -%% -%% ``` -%% telemetry_poller:start_link([ -%% {measurements, Measurements}, -%% {period, Period}, -%% {init_delay, InitDelay} -%% ]) -%% ''' -%% -%% The following measurements are supported: -%% -%% * `memory' (default) -%% -%% * `total_run_queue_lengths' (default) -%% -%% * `system_counts' (default) -%% -%% * `{process_info, Proplist}' -%% -%% * `{Module, Function, Args}' -%% -%% We will discuss each measurement in detail. Also note that the -%% telemetry_poller application ships with a built-in poller that -%% measures `memory', `total_run_queue_lengths' and `system_counts'. This takes -%% the VM measurement out of the way so your application can focus -%% on what is specific to its behaviour. -%% -%% == Memory == -%% -%% An event emitted as `[vm, memory]'. The measurement includes all -%% the key-value pairs returned by {@link erlang:memory/0} function, -%% e.g. `total' for total memory, `processes_used' for memory used by -%% all processes, etc. -%% -%% == Total run queue lengths == -%% -%% On startup, the Erlang VM starts many schedulers to do both IO and -%% CPU work. If a process needs to do some work or wait on IO, it is -%% allocated to the appropriate scheduler. The run queue is a queue of -%% tasks to be scheduled. A length of a run queue corresponds to the amount -%% of work accumulated in the system. If a run queue length is constantly -%% growing, it means that the BEAM is not keeping up with executing all -%% the tasks. -%% -%% There are several run queue types in the Erlang VM. Each CPU scheduler -%% (usually one per core) has its own run queue, and since Erlang 20.0 there -%% is one dirty CPU run queue, and one dirty IO run queue. -%% -%% The run queue length event is emitted as `[vm, total_run_queue_lengths]'. -%% The event contains no metadata and three measurements: -%% -%%
Note: the other solution would be to dispatch two different events by hooking up -%% `example_app:regular_users_session_count/0' and `example_app:admin_users_session_count/0' -%% functions directly. However, if you add more and more user roles to your app, you'll find -%% yourself creating a new event for each one of them, which will force you to modify existing -%% event handlers. If you can break down event value by some feature, like user role in this -%% example, it's usually better to use event metadata than add new events. -%%-%% -%% This is a perfect use case for poller, because you don't need to write a dedicated process -%% which would call these functions periodically. Additionally, if you find that you need to collect -%% more statistics like this in the future, you can easily hook them up to the same poller process -%% and avoid creating lots of processes which would stay idle most of the time. -%% @end -%%%--------------------------------------------------- +-if(?OTP_RELEASE >= 27). +-define(MODULEDOC(Str), -moduledoc(Str)). +-define(DOC(Str), -doc(Str)). +-else. +-define(MODULEDOC(Str), -compile([])). +-define(DOC(Str), -compile([])). +-endif. + -module(telemetry_poller). +?MODULEDOC(""" +A time-based poller to periodically dispatch Telemetry events. + +A poller is a process start in your supervision tree with a list +of measurements to perform periodically. On start it expects the +period in milliseconds and a list of measurements to perform. Initial delay +is an optional parameter that sets time delay in milliseconds before starting +measurements: + + + +### Erlang + +``` +telemetry_poller:start_link([ + {measurements, Measurements}, + {period, Period}, + {init_delay, InitDelay} +]) +``` + +### Elixir + +``` +:telemetry_poller.start_link( + measurements: measurements, + period: period, + init_delay: init_delay +) +``` + + + +## Measurements + +The following measurements are supported: + + * `memory` (default) + * `total_run_queue_lengths` (default) + * `system_counts` (default) + * `{process_info, Proplist}` + * `{Module, Function, Args}` + +We will discuss each measurement in detail. Also note that the +`telemetry_poller` application ships with a built-in poller that +measures `memory`, `total_run_queue_lengths` and `system_counts`. This takes +the VM measurement out of the way so your application can focus +on what is specific to its behaviour. + +### Memory + +An event emitted as `[vm, memory]`. The measurement includes all +the key-value pairs returned by the `erlang:memory/0` function, +e.g. `total` for total memory, `processes_used` for memory used by +all processes, and so on. + +### Total run queue lengths + +On startup, the Erlang VM starts many schedulers to do both IO and +CPU work. If a process needs to do some work or wait on IO, it is +allocated to the appropriate scheduler. The measurement includes the +following keys: + + * `total` - all schedulers (CPU + IO) + * `cpu` - CPU schedulers + * `io` - IO schedulers + +### System counts + +The measurement includes: + + * `process_count` - the number of processes currently existing at the local node + * `atom_count` - the number of atoms currently existing at the local node + * `port_count` - the number of ports currently existing at the local node + +### Process info + +A measurement with information about a given process. It must be specified +alongside a proplist with the process name, the event name, and a list of +keys to be included: + + + +### Erlang + +```erlang +{process_info, [ + {name, my_app_worker}, + {event, [my_app, worker]}, + {keys, [message_queue_len, memory]} +]} +``` + +### Elixir + +```elixir +{:process_info, [ + name: my_app_worker, + event: [my_app, worker], + keys: [message_queue_len, memory] +]} +``` + + + +### Custom measurements + +Telemetry poller also allows you to perform custom measurements by passing +a module-function-args tuple: + + + +### Erlang + +```erlang +{my_app_example, measure, []} +``` + +### Elixir + +```elixir +{MyApp.Example, :measure, []} +``` + + + +The given function will be invoked periodically and they must explicitly invoke the +`telemetry:execute/3` function. If the invocation of the MFA fails, +the measurement is removed from the Poller. + +For all options, see `start_link/1`. The options listed there can be given +to the default poller as well as to custom pollers. + +### Default poller + +A default poller is started with `telemetry_poller` responsible for emitting +measurements for `memory` and `total_run_queue_lengths`. You can customize +the behaviour of the default poller by setting the `default` key under the +`telemetry_poller` application environment. Setting it to `false` disables +the poller. + +## Examples + +### Example 1: tracking number of active sessions in web application + +Let's imagine that you have a web application and you would like to periodically +measure number of active user sessions. + + + +### Erlang + +```erlang +-module(example_app). + +session_count() -> + % logic for calculating session count. +``` + +### Elixir + +```elixir +defmodule ExampleApp do + def session_count do + # logic for calculating session count + end +end +``` + + + +To achieve that, we need a measurement dispatching the value we're interested in: + + + +### Erlang + +```erlang +-module(example_app_measurements). + +dispatch_session_count() -> + telemetry:execute([example_app, session_count], example_app:session_count()). +``` + +### Elixir + +```elixir +defmodule ExampleApp.Measurements do + def dispatch_session_count do + :telemetry.execute([:example_app, :session_count], ExampleApp.session_count()) + end +end +``` + + + +and tell the Poller to invoke it periodically: + + + +### Erlang + +```erlang +telemetry_poller:start_link([{measurements, [{example_app_measurements, dispatch_session_count, []}]). +``` + +### Elixir + +```elixir +:telemetry_poller.start_link(measurements: [{ExampleApp.Measurements, :dispatch_session_count, []}]) +``` + + + +If you find that you need to somehow label the event values, e.g. differentiate between number of +sessions of regular and admin users, you could use event metadata: + + + +### Erlang + +```erlang +-module(example_app_measurements). + +dispatch_session_count() -> + Regulars = example_app:regular_users_session_count(), + Admins = example_app:admin_users_session_count(), + telemetry:execute([example_app, session_count], #{count => Admins}, #{role => admin}), + telemetry:execute([example_app, session_count], #{count => Regulars}, #{role => regular}). +``` + +### Elixir + +```elixir +defmodule ExampleApp.Measurements do + def dispatch_session_count do + regulars = ExampleApp.regular_users_session_count() + admins = ExampleApp.admin_users_session_count() + :telemetry.execute([:example_app, :session_count], %{count: admins}, %{role: :admin}) + :telemetry.execute([:example_app, :session_count], %{count: regulars}, %{role: :regular}) + end +end +``` + + + +> #### Note {: .info} +> +> The other solution would be to dispatch two different events by hooking up +> `example_app:regular_users_session_count/0` and `example_app:admin_users_session_count/0` +> functions directly. However, if you add more and more user roles to your app, you'll find +> yourself creating a new event for each one of them, which will force you to modify existing +event handlers. If you can break down event value by some feature, like user role in this +example, it's usually better to use event metadata than add new events. + +This is a perfect use case for poller, because you don't need to write a dedicated process +which would call these functions periodically. Additionally, if you find that you need to collect +more statistics like this in the future, you can easily hook them up to the same poller process +and avoid creating lots of processes which would stay idle most of the time. +"""). + -behaviour(gen_server). %% API @@ -202,28 +289,61 @@ -include_lib("kernel/include/logger.hrl"). +?DOC(""" +The reference to a poller process. +"""). -type t() :: gen_server:server_ref(). + +?DOC(""" +A list of options for the poller. +"""). -type options() :: [option()]. + +?DOC(""" +An option for the poller. +"""). -type option() :: {name, atom() | gen_server:server_name()} | {period, period()} | {init_delay, init_delay()} | {measurements, [measurement()]}. + +?DOC(""" +A measurement for the poller. +"""). -type measurement() :: memory | total_run_queue_lengths | system_counts | {process_info, [{name, atom()} | {event, [atom()]} | {keys, [atom()]}]} | {module(), atom(), list()}. + +?DOC(""" +A period for the poller. +"""). -type period() :: pos_integer(). + +?DOC(""" +An init delay for the poller. +"""). -type init_delay() :: non_neg_integer(). + -type state() :: #{measurements => [measurement()], period => period()}. -%% @doc Starts a poller linked to the calling process. -%% -%% Useful for starting Pollers as a part of a supervision tree. -%% -%% Default options: [{name, telemetry_poller}, {period, timer:seconds(5)}, {init_delay, 0}] +?DOC(""" +Starts a poller linked to the calling process. + +Useful for starting Pollers as a part of a supervision tree. + +## Options + +The default options are: + + * `{name, telemetry_poller}` + * `{period, timer:seconds(5)}` + * `{init_delay, 0}` + +"""). -spec start_link(options()) -> gen_server:start_ret(). start_link(Opts) when is_list(Opts) -> Args = parse_args(Opts), @@ -234,21 +354,24 @@ start_link(Opts) when is_list(Opts) -> false -> gen_server:start_link(?MODULE, Args, []) end. -%% @doc -%% Returns a list of measurements used by the poller. +?DOC(""" +Returns a list of measurements used by the poller. +"""). -spec list_measurements(t()) -> [measurement()]. list_measurements(Poller) -> gen_server:call(Poller, get_measurements). +?DOC(false). -spec init(map()) -> {ok, state()}. init(Args) -> schedule_measurement(maps:get(init_delay, Args)), {ok, #{ measurements => maps:get(measurements, Args), period => maps:get(period, Args)}}. - -%% @doc -%% Returns a child spec for the poller for running under a supervisor. +?DOC(""" + Returns a child spec for the poller for running under a supervisor. +"""). +-spec child_spec(options()) -> supervisor:child_spec(). child_spec(Opts) -> Id = case proplists:get_value(name, Opts) of @@ -343,13 +466,16 @@ make_measurement(Measurement = {M, F, A}) -> error end. +?DOC(false). handle_call(get_measurements, _From, State = #{measurements := Measurements}) -> {reply, Measurements, State}; handle_call(_Request, _From, State) -> {reply, ok, State}. +?DOC(false). handle_cast(_Msg, State) -> {noreply, State}. +?DOC(false). handle_info(collect, State) -> GoodMeasurements = make_measurements_and_filter_misbehaving(maps:get(measurements, State)), schedule_measurement(maps:get(period, State)), @@ -357,6 +483,8 @@ handle_info(collect, State) -> handle_info(_, State) -> {noreply, State}. +?DOC(false). terminate(_Reason, _State) -> ok. +?DOC(false). code_change(_OldVsn, State, _Extra) -> {ok, State}.