You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2019/05/01 16:01:43 UTC

[couchdb] branch master updated: Handle database re-creation edge case in internal replicator

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 3714ff5  Handle database re-creation edge case in internal replicator
3714ff5 is described below

commit 3714ff53ac7f82654dece7a977eafb95decce113
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Tue Apr 30 19:28:12 2019 -0400

    Handle database re-creation edge case in internal replicator
    
    Previously, if a database was deleted and re-created while the internal
    replication request was pending, the job would have been retried continuously.
    
    mem3:targets_map/2 function would return an empty targets map and mem3_rep:go
    would raise a function clause exception if the database as present but it was
    an older "incarnation" of it (with shards living on different target nodes).
    
    Because it was an exception and not an {error, ...} result, the process would
    exit with an error. Subsequently, mem3_sync would try to handle process exit
    and check of the database was deleted, but it also didn't account for the case
    when the database was created, so it would resubmit the into queue again.
    
    To fix it, we introduce a function to check if the database shard is part of
    the current database shard map. Then perform the check both before building the
    targets map and also on job retries.
---
 src/mem3/src/mem3.erl      | 20 ++++++++++++++++++++
 src/mem3/src/mem3_rep.erl  |  8 +++++++-
 src/mem3/src/mem3_sync.erl | 11 ++++++-----
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index dc666fd..6f3a10d 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -22,6 +22,7 @@
 -export([belongs/2, owner/3]).
 -export([get_placement/1]).
 -export([ping/1, ping/2]).
+-export([db_is_current/1]).
 
 %% For mem3 use only.
 -export([name/1, node/1, range/1, engine/1]).
@@ -367,6 +368,25 @@ ping(Node, Timeout) when is_atom(Node) ->
             pang
     end.
 
+
+db_is_current(#shard{name = Name}) ->
+    db_is_current(Name);
+
+db_is_current(<<"shards/", _/binary>> = Name) ->
+    try
+        Shards = mem3:shards(mem3:dbname(Name)),
+        lists:keyfind(Name, #shard.name, Shards) =/= false
+    catch
+        error:database_does_not_exist ->
+            false
+    end;
+
+db_is_current(Name) when is_binary(Name) ->
+    % This accounts for local (non-sharded) dbs, and is mostly
+    % for unit tests that either test or use mem3_rep logic
+    couch_server:exists(Name).
+
+
 -ifdef(TEST).
 
 -include_lib("eunit/include/eunit.hrl").
diff --git a/src/mem3/src/mem3_rep.erl b/src/mem3/src/mem3_rep.erl
index d5b42d3..d2edd6c 100644
--- a/src/mem3/src/mem3_rep.erl
+++ b/src/mem3/src/mem3_rep.erl
@@ -64,7 +64,13 @@ go(DbName, Node, Opts) when is_binary(DbName), is_atom(Node) ->
     go(#shard{name=DbName, node=node()}, #shard{name=DbName, node=Node}, Opts);
 
 go(#shard{} = Source, #shard{} = Target, Opts) ->
-    go(Source, targets_map(Source, Target), Opts);
+    case mem3:db_is_current(Source) of
+        true ->
+            go(Source, targets_map(Source, Target), Opts);
+        false ->
+            % Database could have been recreated
+            {error, missing_source}
+    end;
 
 go(#shard{} = Source, #{} = Targets0, Opts) when map_size(Targets0) > 0 ->
     Targets = maps:map(fun(_, T) -> #tgt{shard = T} end, Targets0),
diff --git a/src/mem3/src/mem3_sync.erl b/src/mem3/src/mem3_sync.erl
index 693fc4f..8170f3c 100644
--- a/src/mem3/src/mem3_sync.erl
+++ b/src/mem3/src/mem3_sync.erl
@@ -140,11 +140,12 @@ handle_info({'EXIT', Active, Reason}, State) ->
         case Reason of {pending_changes, Count} ->
             maybe_resubmit(State, Job#job{pid = nil, count = Count});
         _ ->
-            try mem3:shards(mem3:dbname(Job#job.name)) of _ ->
-                timer:apply_after(5000, ?MODULE, push, [Job#job{pid=nil}])
-            catch error:database_does_not_exist ->
-                % no need to retry
-                ok
+            case mem3:db_is_current(Job#job.name) of
+                true ->
+                    timer:apply_after(5000, ?MODULE, push, [Job#job{pid=nil}]);
+                false ->
+                    % no need to retry (db deleted or recreated)
+                    ok
             end,
             State
         end;