You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@couchdb.apache.org by va...@apache.org on 2022/10/17 20:51:30 UTC

[couchdb] branch main updated (e2d88138b -> f3950c520)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


    from e2d88138b Make test prometheus port explicit
     new 9636405b2 Avoid refresh messages piling up in prometheus server
     new 4a9bef0fe Fix prometheus message queue len race condition
     new f3950c520 Add a few more prometheus tests

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/couch_prometheus_server.erl                | 71 ++++++++++++++++++----
 1 file changed, 59 insertions(+), 12 deletions(-)

[couchdb] 01/03: Avoid refresh messages piling up in prometheus server

Posted by va...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 9636405b29f9157eb7870995d837cb53a77cf1f2
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Oct 17 13:57:18 2022 -0400

    Avoid refresh messages piling up in prometheus server
    
    Previously, the cast `refresh` message didn't explicitly cancel the previous
    timer. If a `gen_server:call(Server, refresh)` was made and a `refresh` message
    was already in the message queue, it was possible to end up with two parallel
    refresh periods (cycles): one with the regular refresh period, the other
    generated by the regular refresh gen_server call.
    
    In addition, drain all the previous refresh messages before scheduling a new
    timer. This should further help lower the chance of refresh cycles piling up on
    top of each other.
    
    The fix is mostly theoretical as technically I don't think anything triggers a
    `refresh` gen_server:call/2 calls in practice. It would have to be a manual or
    an externally scripted call.
---
 src/couch_prometheus/src/couch_prometheus_server.erl | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 701483a38..969f68ec9 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -70,7 +70,8 @@ handle_call(Msg, _From, State) ->
 handle_cast(Msg, State) ->
     {stop, {unknown_cast, Msg}, State}.
 
-handle_info(refresh, State) ->
+handle_info(refresh, #st{refresh = OldRT} = State) ->
+    timer:cancel(OldRT),
     Metrics = refresh_metrics(),
     RT = update_refresh_timer(),
     {noreply, State#st{metrics = Metrics, refresh = RT}};
@@ -184,6 +185,14 @@ get_ets_stats() ->
     NumTabs = length(ets:all()),
     to_prom(erlang_ets_table, gauge, NumTabs).
 
+drain_refresh_messages() ->
+    receive
+        refresh -> drain_refresh_messages()
+    after 0 ->
+        ok
+    end.
+
 update_refresh_timer() ->
+    drain_refresh_messages(),
     RefreshTime = 1000 * config:get_integer("couch_prometheus", "interval", ?REFRESH_INTERVAL),
     erlang:send_after(RefreshTime, self(), refresh).

[couchdb] 02/03: Fix prometheus message queue len race condition

Posted by va...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 4a9bef0fe02832111c0c52dd0e0215e32693bffd
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Oct 17 14:46:43 2022 -0400

    Fix prometheus message queue len race condition
    
    By the time we get the message queue length the registered process might have
    died already. In that case `whereis/1` would return `undefined` and would crash
    the gen_server process.
---
 .../src/couch_prometheus_server.erl                | 23 +++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 969f68ec9..fbca9eb66 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -149,23 +149,24 @@ get_io_stats() ->
     ].
 
 get_message_queue_stats() ->
-    Queues = lists:map(
-        fun(Name) ->
-            case process_info(whereis(Name), message_queue_len) of
-                {message_queue_len, N} ->
-                    N;
-                _ ->
-                    0
-            end
-        end,
-        registered()
-    ),
+    QLenFun = fun(Name) -> message_queue_len(whereis(Name)) end,
+    Queues = lists:map(QLenFun, registered()),
     [
         to_prom(erlang_message_queues, gauge, lists:sum(Queues)),
         to_prom(erlang_message_queue_min, gauge, lists:min(Queues)),
         to_prom(erlang_message_queue_max, gauge, lists:max(Queues))
     ].
 
+message_queue_len(undefined) ->
+    0;
+message_queue_len(Pid) when is_pid(Pid) ->
+    case process_info(Pid, message_queue_len) of
+        {message_queue_len, N} ->
+            N;
+        _ ->
+            0
+    end.
+
 get_run_queue_stats() ->
     %% Workaround for https://bugs.erlang.org/browse/ERL-1355
     {Normal, Dirty} =

[couchdb] 03/03: Add a few more prometheus tests

Posted by va...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit f3950c52057f52fe4c5d8fa00946c5c1ea5e176e
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Mon Oct 17 16:05:17 2022 -0400

    Add a few more prometheus tests
---
 .../src/couch_prometheus_server.erl                | 39 +++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index fbca9eb66..d13d11941 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -160,7 +160,7 @@ get_message_queue_stats() ->
 message_queue_len(undefined) ->
     0;
 message_queue_len(Pid) when is_pid(Pid) ->
-    case process_info(Pid, message_queue_len) of
+    case erlang:process_info(Pid, message_queue_len) of
         {message_queue_len, N} ->
             N;
         _ ->
@@ -197,3 +197,40 @@ update_refresh_timer() ->
     drain_refresh_messages(),
     RefreshTime = 1000 * config:get_integer("couch_prometheus", "interval", ?REFRESH_INTERVAL),
     erlang:send_after(RefreshTime, self(), refresh).
+
+-ifdef(TEST).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+system_stats_test() ->
+    lists:foreach(
+        fun(Line) ->
+            ?assert(is_binary(Line)),
+            ?assert((starts_with(<<"couchdb_">>, Line) orelse starts_with(<<"# TYPE ">>, Line)))
+        end,
+        get_system_stats()
+    ).
+
+starts_with(Prefix, Line) when is_binary(Prefix), is_binary(Line) ->
+    binary:longest_common_prefix([Prefix, Line]) > 0.
+
+message_queue_len_test() ->
+    self() ! refresh,
+    ?assert(message_queue_len(self()) >= 1),
+    ?assertEqual(0, message_queue_len(undefined)),
+    {Pid, Ref} = spawn_monitor(fun() -> ok end),
+    receive
+        {'DOWN', Ref, process, Pid, _} ->
+            ok
+    end,
+    ?assertEqual(0, message_queue_len(Pid)).
+
+drain_refresh_messages_test() ->
+    self() ! refresh,
+    {messages, Mq0} = erlang:process_info(self(), messages),
+    ?assert(lists:member(refresh, Mq0)),
+    drain_refresh_messages(),
+    {messages, Mq1} = erlang:process_info(self(), messages),
+    ?assert(not lists:member(refresh, Mq1)).
+
+-endif.