You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wi...@apache.org on 2023/04/13 11:38:07 UTC
[couchdb] 01/01: feat (prometheus): add Erlang distribution stats
This is an automated email from the ASF dual-hosted git repository.
willholley pushed a commit to branch prometheus_erlang_dist
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit ff263387493871921836cacb0a96c49a6d2fb3fd
Author: Will Holley <wi...@uk.ibm.com>
AuthorDate: Wed Apr 12 18:10:26 2023 +0000
feat (prometheus): add Erlang distribution stats
# Why
The _prometheus endpoint was missing the erlang distribution stats
returned by the _system endpoint. This is useful when diagnosing
networking issues between couchdb nodes.
# How
Adds a new function `couch_prometheus_server:get_distribution_stats/0`.
This gathers the distribution stats in a similar fashion to
`chttpd_node:get_distribution_stats/0` but formats them in a more
prometheus-friendly way. Naming convention follows prometheus standards,
so the type of the value is appended to the metric name and, where
counter types are used, a "_total" suffix is added.
For example:
```
couchdb_erlang_distribution_recv_oct_bytes_total{node="node2@127.0.0.1"} 30609
couchdb_erlang_distribution_recv_oct_bytes_total{node="node3@127.0.0.1"} 28392
```
---
.../src/couch_prometheus_server.erl | 110 ++++++++++++++++++++-
1 file changed, 109 insertions(+), 1 deletion(-)
diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 884d792f0..8a17980a1 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -116,7 +116,8 @@ get_system_stats() ->
get_vm_stats(),
get_ets_stats(),
get_internal_replication_jobs_stat(),
- get_membership_stat()
+ get_membership_stat(),
+ get_distribution_stats()
]).
get_uptime_stat() ->
@@ -264,6 +265,113 @@ get_run_queue_stats() ->
)
].
+get_distribution_stats() ->
+ % each distribution metric has a different type,
+ % so expose each as a different metric with the erlang
+ % node as a label.
+ % This is the inverse of the structure returned by
+ % inet:getstat/1.
+
+ % this is a bit hairy. This fold accumulates
+ % a dictionary keyed on the socker stat_option (https://www.erlang.org/doc/man/inet.html#getstat-2)
+ % where the value is a list of labels/value pairs for that stat
+ % e.g.
+ % recv_oct: [{[{node="node2@127.0.0.1"}], 30609}, {[{node="node3@127.0.0.1"}], 28392}]
+ % recv_cnt: [{[{node="node2@127.0.0.1"}], 123}, {[{node="node3@127.0.0.1"}], 134}]
+ DistStats = lists:foldl(
+ fun({Node, Socket}, Acc) ->
+ try inet:getstat(Socket) of
+ {ok, Stats} ->
+ % For each Key/Value pair in Stats, append
+ % an entry for the current Node to the result.
+ % This relies on lists:foldl returning the final
+ % accumulated value
+ lists:foldl(
+ fun({StatOption, Value}, Acc0) ->
+ dict:append(StatOption, {[{node, Node}], Value}, Acc0)
+ end,
+ Acc,
+ Stats
+ )
+ catch
+ _:_ ->
+ % no result so just continue
+ Acc
+ end
+ end,
+ dict:new(),
+ erlang:system_info(dist_ctrl)
+ ),
+ [
+ to_prom(
+ erlang_distribution_recv_oct_bytes_total,
+ counter,
+ "Number of bytes received by the socket.",
+ safe_dict_fetch(recv_oct, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_recv_cnt_packets_total,
+ counter,
+ "number of packets received by the socket.",
+ safe_dict_fetch(recv_cnt, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_recv_max_bytes,
+ gauge,
+ "size of the largest packet, in bytes, received by the socket.",
+ safe_dict_fetch(recv_max, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_recv_avg_bytes,
+ gauge,
+ "average size of packets, in bytes, received by the socket.",
+ safe_dict_fetch(recv_avg, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_recv_dvi_bytes,
+ gauge,
+ "average packet size deviation, in bytes, received by the socket.",
+ safe_dict_fetch(recv_dvi, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_send_oct_bytes_total,
+ counter,
+ "Number of bytes sent by the socket.",
+ safe_dict_fetch(send_oct, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_send_cnt_packets_total,
+ counter,
+ "number of packets sent by the socket.",
+ safe_dict_fetch(send_cnt, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_send_max_bytes,
+ gauge,
+ "size of the largest packet, in bytes, sent by the socket.",
+ safe_dict_fetch(send_max, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_send_avg_bytes,
+ gauge,
+ "average size of packets, in bytes, sent by the socket.",
+ safe_dict_fetch(send_avg, DistStats)
+ ),
+ to_prom(
+ erlang_distribution_send_pend_bytes,
+ gauge,
+ "number of bytes waiting to be sent by the socket.",
+ safe_dict_fetch(send_pend, DistStats)
+ )
+ ].
+
+safe_dict_fetch(Key, Dict) ->
+ try dict:fetch(Key, Dict) of
+ Val -> Val
+ catch
+ _:_Else -> []
+ end.
+
get_ets_stats() ->
NumTabs = length(ets:all()),
to_prom(erlang_ets_table, gauge, "number of ETS tables", NumTabs).