You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2021/10/28 21:57:32 UTC

[couchdb] branch fix-custodian-hard-coded-dbs created (now d6af550)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch fix-custodian-hard-coded-dbs
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at d6af550  Eliminate custodian false positive errors for dbs with N < default N

This branch includes the following new commits:

     new 1ba0e4a  Use configured shards db in custodian instead of `"dbs"`
     new d6af550  Eliminate custodian false positive errors for dbs with N < default N

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[couchdb] 02/02: Eliminate custodian false positive errors for dbs with N < default N

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-custodian-hard-coded-dbs
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit d6af5506014e918ac8f3acae90b6929c4f38accd
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Thu Oct 28 16:54:53 2021 -0400

    Eliminate custodian false positive errors for dbs with N < default N
    
    Previously, dbs with N < cluster default N would pollute logs with critical
    errors regarding not having enough shards. Instead, use each database's
    expected N value to emit custodian reports.
    
    Note: the expected N value is a bit tricky to understand since with shard
    splitting feature, shard ranges are not guaranteed to exactly match for all
    copies. The N value is then defined as the max number of rings which can be
    completed with the given set of shards -- complete the ring once, remove
    participating shards, try again, etc. Lucky for us, that function is already
    written (`mem3_util:calculate_max_n(Shards)` so we are just re-using it.
---
 src/custodian/src/custodian_util.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/custodian/src/custodian_util.erl b/src/custodian/src/custodian_util.erl
index ac46cb1..6d5a560 100644
--- a/src/custodian/src/custodian_util.erl
+++ b/src/custodian/src/custodian_util.erl
@@ -21,7 +21,7 @@
 % Old design doc which should be cleaned up
 -define(CUSTODIAN_ID, <<"_design/custodian">>).
 
--record(state, {live, safe, n, callback, db, acc}).
+-record(state, {live, safe, callback, db, acc}).
 
 %% public functions.
 
@@ -55,10 +55,9 @@ ensure_dbs_exists() ->
 fold_dbs(Acc, Fun) ->
     Safe = maybe_redirect([node() | nodes()]),
     Live = Safe -- maintenance_nodes(Safe),
-    N = cluster_n(),
     {ok, Db} = ensure_dbs_exists(),
     try
-        State0 = #state{live=Live, safe=Safe, n=N, callback=Fun, db=Db, acc=Acc},
+        State0 = #state{live=Live, safe=Safe, callback=Fun, db=Db, acc=Acc},
         {ok, State1} = couch_db:fold_docs(Db, fun fold_dbs1/2, State0, []),
         State1#state.acc
     after
@@ -82,9 +81,9 @@ fold_dbs1(#full_doc_info{id = Id} = FDI, State) ->
 fold_dbs(Id, Shards, State) ->
     IsSafe = fun(#shard{node = N}) -> lists:member(N, State#state.safe) end,
     IsLive = fun(#shard{node = N}) -> lists:member(N, State#state.live) end,
-    TargetN = State#state.n,
     LiveShards = lists:filter(IsLive, Shards),
     SafeShards = lists:filter(IsSafe, Shards),
+    TargetN = mem3_util:calculate_max_n(Shards),
     Acc0 = State#state.acc,
     Acc1 = case mem3_util:calculate_max_n(LiveShards) of
         LiveN when LiveN < TargetN ->

[couchdb] 01/02: Use configured shards db in custodian instead of `"dbs"`

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-custodian-hard-coded-dbs
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 1ba0e4ac4c919cec430dd94a0a247704b059a75a
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Thu Oct 28 16:53:55 2021 -0400

    Use configured shards db in custodian instead of `"dbs"`
---
 src/custodian/README                   | 2 +-
 src/custodian/src/custodian_server.erl | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/custodian/README b/src/custodian/README
index 72681f4..ff88373 100644
--- a/src/custodian/README
+++ b/src/custodian/README
@@ -1,6 +1,6 @@
 Custodian is responsible for the data stored in CouchDB databases.
 
-Custodian scans the "dbs" database, which details the location of
+Custodian scans the shards database, which details the location of
 every shard of every database and ensures that operators are aware of
 any shard that is under-replicated (has less than N copies).
 
diff --git a/src/custodian/src/custodian_server.erl b/src/custodian/src/custodian_server.erl
index 0a21eed..0c8b87e 100644
--- a/src/custodian/src/custodian_server.erl
+++ b/src/custodian/src/custodian_server.erl
@@ -132,8 +132,9 @@ start_shard_checker(#state{shard_checker=Pid}=State) when is_pid(Pid) ->
 
 
 start_event_listener() ->
+    DbName = mem3_sync:shards_db(),
     couch_event:link_listener(
-            ?MODULE, handle_db_event, nil, [{dbname, <<"dbs">>}]
+            ?MODULE, handle_db_event, nil, [{dbname, DbName}]
         ).
 
 handle_db_event(_DbName, updated, _St) ->