You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wi...@apache.org on 2023/04/13 11:38:06 UTC

[couchdb] branch prometheus_erlang_dist updated (eba421924 -> ff2633874)

This is an automated email from the ASF dual-hosted git repository.

willholley pushed a change to branch prometheus_erlang_dist
in repository https://gitbox.apache.org/repos/asf/couchdb.git


 discard eba421924 feat (prometheus): add Erlang distribution stats
     new ff2633874 feat (prometheus): add Erlang distribution stats

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (eba421924)
            \
             N -- N -- N   refs/heads/prometheus_erlang_dist (ff2633874)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch_prometheus/src/couch_prometheus_server.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)


[couchdb] 01/01: feat (prometheus): add Erlang distribution stats

Posted by wi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch prometheus_erlang_dist
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ff263387493871921836cacb0a96c49a6d2fb3fd
Author: Will Holley <wi...@uk.ibm.com>
AuthorDate: Wed Apr 12 18:10:26 2023 +0000

    feat (prometheus): add Erlang distribution stats
    
    # Why
    
    The _prometheus endpoint was missing the erlang distribution stats
    returned by the _system endpoint. This is useful when diagnosing
    networking issues between couchdb nodes.
    
    # How
    
    Adds a new function `couch_prometheus_server:get_distribution_stats/0`.
    This gathers the distribution stats in a similar fashion to
    `chttpd_node:get_distribution_stats/0` but formats them in a more
    prometheus-friendly way. Naming convention follows prometheus standards,
    so the type of the value is appended to the metric name and, where
    counter types are used, a "_total" suffix is added.
    
    For example:
    
    ```
    couchdb_erlang_distribution_recv_oct_bytes_total{node="node2@127.0.0.1"} 30609
    couchdb_erlang_distribution_recv_oct_bytes_total{node="node3@127.0.0.1"} 28392
    ```
---
 .../src/couch_prometheus_server.erl                | 110 ++++++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 884d792f0..8a17980a1 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -116,7 +116,8 @@ get_system_stats() ->
         get_vm_stats(),
         get_ets_stats(),
         get_internal_replication_jobs_stat(),
-        get_membership_stat()
+        get_membership_stat(),
+        get_distribution_stats()
     ]).
 
 get_uptime_stat() ->
@@ -264,6 +265,113 @@ get_run_queue_stats() ->
         )
     ].
 
+get_distribution_stats() ->
+    % each distribution metric has a different type,
+    % so expose each as a different metric with the erlang
+    % node as a label.
+    % This is the inverse of the structure returned by
+    % inet:getstat/1.
+
+    % this is a bit hairy. This fold accumulates
+    % a dictionary keyed on the socker stat_option (https://www.erlang.org/doc/man/inet.html#getstat-2)
+    % where the value is a list of labels/value pairs for that stat
+    % e.g.
+    % recv_oct: [{[{node="node2@127.0.0.1"}], 30609}, {[{node="node3@127.0.0.1"}], 28392}]
+    % recv_cnt: [{[{node="node2@127.0.0.1"}], 123}, {[{node="node3@127.0.0.1"}], 134}]
+    DistStats = lists:foldl(
+        fun({Node, Socket}, Acc) ->
+            try inet:getstat(Socket) of
+                {ok, Stats} ->
+                    % For each Key/Value pair in Stats, append
+                    % an entry for the current Node to the result.
+                    % This relies on lists:foldl returning the final
+                    % accumulated value
+                    lists:foldl(
+                        fun({StatOption, Value}, Acc0) ->
+                            dict:append(StatOption, {[{node, Node}], Value}, Acc0)
+                        end,
+                        Acc,
+                        Stats
+                    )
+            catch
+                _:_ ->
+                    % no result so just continue
+                    Acc
+            end
+        end,
+        dict:new(),
+        erlang:system_info(dist_ctrl)
+    ),
+    [
+        to_prom(
+            erlang_distribution_recv_oct_bytes_total,
+            counter,
+            "Number of bytes received by the socket.",
+            safe_dict_fetch(recv_oct, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_cnt_packets_total,
+            counter,
+            "number of packets received by the socket.",
+            safe_dict_fetch(recv_cnt, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_max_bytes,
+            gauge,
+            "size of the largest packet, in bytes, received by the socket.",
+            safe_dict_fetch(recv_max, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_avg_bytes,
+            gauge,
+            "average size of packets, in bytes, received by the socket.",
+            safe_dict_fetch(recv_avg, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_recv_dvi_bytes,
+            gauge,
+            "average packet size deviation, in bytes, received by the socket.",
+            safe_dict_fetch(recv_dvi, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_oct_bytes_total,
+            counter,
+            "Number of bytes sent by the socket.",
+            safe_dict_fetch(send_oct, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_cnt_packets_total,
+            counter,
+            "number of packets sent by the socket.",
+            safe_dict_fetch(send_cnt, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_max_bytes,
+            gauge,
+            "size of the largest packet, in bytes, sent by the socket.",
+            safe_dict_fetch(send_max, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_avg_bytes,
+            gauge,
+            "average size of packets, in bytes, sent by the socket.",
+            safe_dict_fetch(send_avg, DistStats)
+        ),
+        to_prom(
+            erlang_distribution_send_pend_bytes,
+            gauge,
+            "number of bytes waiting to be sent by the socket.",
+            safe_dict_fetch(send_pend, DistStats)
+        )
+    ].
+
+safe_dict_fetch(Key, Dict) ->
+    try dict:fetch(Key, Dict) of
+        Val -> Val
+    catch
+        _:_Else -> []
+    end.
+
 get_ets_stats() ->
     NumTabs = length(ets:all()),
     to_prom(erlang_ets_table, gauge, "number of ETS tables", NumTabs).