You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wi...@apache.org on 2023/04/13 05:46:51 UTC

[couchdb] 03/03: feat (prometheus): add Erlang distribution stats

This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch prometheus_erlang_dist
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit eba421924bf513e77c88981e1d790426ff3283ca
Author: Will Holley <wi...@uk.ibm.com>
AuthorDate: Wed Apr 12 18:10:26 2023 +0000

    feat (prometheus): add Erlang distribution stats
    
    # Why
    
    The _prometheus endpoint was missing the erlang distribution stats
    returned by the _system endpoint. This is useful when diagnosing
    networking issues between couchdb nodes.
    
    # How
    
    Adds a new function `couch_prometheus_server:get_distribution_stats/0`.
    This gathers the distribution stats in a similar fashion to
    `chttpd_node:get_distribution_stats/0` but formats them in a more
    prometheus-friendly way. Naming convention follows prometheus standards,
    so the type of the value is appended to the metric name and, where
    counter types are used, a "_total" suffix is added.
    
    For example:
    
    ```
    couchdb_erlang_distribution_recv_oct_bytes_total{node="node2@127.0.0.1"} 30609
    couchdb_erlang_distribution_recv_oct_bytes_total{node="node3@127.0.0.1"} 28392
    ```
---
 .../src/couch_prometheus_server.erl                | 110 ++++++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 884d792f0..ac90ae5fe 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -116,7 +116,8 @@ get_system_stats() ->
         get_vm_stats(),
         get_ets_stats(),
         get_internal_replication_jobs_stat(),
-        get_membership_stat()
+        get_membership_stat(),
+        get_distribution_stats()
     ]).
 
 get_uptime_stat() ->
@@ -264,6 +265,113 @@ get_run_queue_stats() ->
         )
     ].
 
+get_distribution_stats() ->
+    % each distribution metric has a different type,
+    % so expose each as a different metric with the erlang
+    % node as a label.
+    % This is the inverse of the structure returned by
+    % inet:getstat/1.
+
+    % this is a bit hairy. This fold accumulates
+    % a dictionary keyed on the socker stat_option (https://www.erlang.org/doc/man/inet.html#getstat-2)
+    % where the value is a list of labels/value pairs for that stat
+    % e.g.
+    % recv_oct: [{[{node="node2@127.0.0.1"}], 30609}, {[{node="node3@127.0.0.1"}], 28392}]
+    % recv_cnt: [{[{node="node2@127.0.0.1"}], 123}, {[{node="node3@127.0.0.1"}], 134}]
+    DistStats = lists:foldl(
+        fun({Node, Socket}, Acc) ->
+            try inet:getstat(Socket) of
+                {ok, Stats} ->
+                    % For each Key/Value pair in Stats, append
+                    % an entry for the current Node to the result.
+                    % This relies on lists:foldl returning the final
+                    % accumulated value
+                    lists:foldl(
+                        fun({StatOption, Value}, Acc0) ->
+                            dict:append(StatOption, {[{node, Node}], Value}, Acc0)
+                        end,
+                        Acc,
+                        Stats
+                    )
+            catch
+                _:_ ->
+                    % no result so just continue
+                    Acc
+            end
+        end,
+        dict:new(),
+        erlang:system_info(dist_ctrl)
+    ),
+    [
+        to_prom(
+            erlang_distribution_recv_oct_bytes_total,
+            counter,
+            "Number of bytes received by the socket.",
+            safe_dict_fetch(recv_oct, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_cnt_packets_total,
+            counter,
+            "number of packets received by the socket.",
+            safe_dict_fetch(recv_cnt, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_max_bytes,
+            gauge,
+            "size of the largest packet, in bytes, received by the socket.",
+            safe_dict_fetch(recv_max, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_avg_bytes,
+            gauge,
+            "average size of packets, in bytes, received by the socket.",
+            safe_dict_fetch(recv_avg, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_dvi_bytes,
+            gauge,
+            "average packet size deviation, in bytes, received by the socket.",
+            safe_dict_fetch(recv_dvi, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_oct_bytes_total,
+            counter,
+            "Number of bytes sent by the socket.",
+            safe_dict_fetch(send_oct, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_cnt_packets_total,
+            counter,
+            "number of packets sent by the socket.",
+            safe_dict_fetch(send_cnt, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_max_bytes,
+            gauge,
+            "size of the largest packet, in bytes, sent by the socket.",
+            safe_dict_fetch(send_max, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_avg_bytes,
+            gauge,
+            "average size of packets, in bytes, sent by the socket.",
+            safe_dict_fetch(send_avg, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_dvi_bytes,
+            gauge,
+            "average packet size deviation, in bytes, sent by the socket.",
+            safe_dict_fetch(rsend_dvi, DistStats)
+        )
+    ].
+
+safe_dict_fetch(Key, Dict) ->
+    try dict:fetch(Key, Dict) of
+        Val -> Val
+    catch
+        _ -> []
+    end.
+
 get_ets_stats() ->
     NumTabs = length(ets:all()),
     to_prom(erlang_ets_table, gauge, "number of ETS tables", NumTabs).