You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wi...@apache.org on 2023/03/13 13:16:27 UTC

[couchdb] branch remove-prometheus-duplicates updated (26578f880 -> fad5b33cc)

This is an automated email from the ASF dual-hosted git repository.

willholley pushed a change to branch remove-prometheus-duplicates
in repository https://gitbox.apache.org/repos/asf/couchdb.git


 discard 26578f880 fix: remove duplicate vm_stats from _prometheus
     new fad5b33cc fix: remove duplicate vm_stats from _prometheus

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (26578f880)
            \
             N -- N -- N   refs/heads/remove-prometheus-duplicates (fad5b33cc)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


[couchdb] 01/01: fix: remove duplicate vm_stats from _prometheus

Posted by wi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch remove-prometheus-duplicates
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit fad5b33cce756376c23ae2b446b38483066d29ba
Author: Will Holley <wi...@apache.org>
AuthorDate: Fri Mar 10 09:10:40 2023 +0000

    fix: remove duplicate vm_stats from _prometheus
    
    The `_node/_local/_prometheus` was returning duplcate rows for the
    following metrics:
    
    ```
    couchdb_erlang_memory_bytes
    couchdb_erlang_gc_collections_total
    couchdb_erlang_gc_words_reclaimed_total
    couchdb_erlang_context_switches_total
    couchdb_erlang_reductions_total
    couchdb_erlang_processes
    couchdb_erlang_process_limit
    ```
    
    Prometheus will gracefully handle the duplication, picking the first
    entry only, but it bloats the response and can potentially cause
    unexpected results if there's a signficant delay capturing the
    samples.
    
    The duplication is caused by a duplicate function call to
    `get_vm_stats()` in the prometheus endpoint handler. Removing the
    duplicate call fixes the problem.
---
 src/couch_prometheus/src/couch_prometheus_server.erl  |  1 -
 .../test/eunit/couch_prometheus_e2e_tests.erl         | 19 ++++++++++++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 7a0eb4bf9..7597c7e28 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -108,7 +108,6 @@ get_couchdb_stats() ->
 get_system_stats() ->
     lists:flatten([
         get_uptime_stat(),
-        get_vm_stats(),
         get_io_stats(),
         get_message_queue_stats(),
         get_run_queue_stats(),
diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
index 5f458ba8c..09c48a5cb 100644
--- a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
+++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
@@ -39,7 +39,8 @@ e2e_test_() ->
                 [
                     ?TDEF_FE(t_chttpd_port),
                     ?TDEF_FE(t_prometheus_port),
-                    ?TDEF_FE(t_metric_updated)
+                    ?TDEF_FE(t_metric_updated),
+                    ?TDEF_FE(t_no_duplicate_metrics)
                 ]
             }
         }
@@ -105,6 +106,22 @@ t_reject_prometheus_port(Port) ->
     Response = test_request:get(node_local_url(Port), [?CONTENT_JSON, ?AUTH]),
     ?assertEqual({error, {conn_failed, {error, econnrefused}}}, Response).
 
+t_no_duplicate_metrics(Port) ->
+    Url = node_local_url(Port),
+    Stats = get_stats(Url),
+    Lines = re:split(Stats, "\n"),
+    % Filter the result to only the lines containing the metric
+    % definition, not the values. These lines always start with
+    % a # character.
+    MetricDefs = lists:filter(fun(S) -> string:find(S, "#") =:= S end, Lines),
+    ?assertNotEqual(erlang:length(MetricDefs), 0),
+    Diff = get_duplicates(MetricDefs),
+    ?debugVal(Diff),
+    ?assertEqual(erlang:length(Diff), 0).
+
+get_duplicates(List) ->
+    List -- sets:to_list(sets:from_list(List)).
+
 t_metric_updated(Port) ->
     % The passage of time should increment this metric
     Metric = "couchdb_uptime_seconds",