You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2020/01/10 17:58:23 UTC

[couchdb] branch reset-target-shard-on-init-copy-retries created (now b0fa100)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch reset-target-shard-on-init-copy-retries
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at b0fa100  When shard splitting make sure to reset the targets before any retries

This branch includes the following new commits:

     new b0fa100  When shard splitting make sure to reset the targets before any retries

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 01/01: When shard splitting make sure to reset the targets before any retries

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch reset-target-shard-on-init-copy-retries
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit b0fa10076a5f0d9403c157becc949a17c250cd88
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Jan 10 12:53:46 2020 -0500

    When shard splitting make sure to reset the targets before any retries
    
    Previously the target was reset only when the whole job started, but not when
    the initial copy phase restarted on its own. If that happened, we left the
    target around so the retry failed always with the `eexist` error.
    
    Target reset has a check to make sure the shards are not in the global shard
    map, in case someone manually added them, for example. If they are found there
    the job panics and exists.
---
 src/mem3/src/mem3_reshard_job.erl | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/mem3/src/mem3_reshard_job.erl b/src/mem3/src/mem3_reshard_job.erl
index d3a33d3..aedca21 100644
--- a/src/mem3/src/mem3_reshard_job.erl
+++ b/src/mem3/src/mem3_reshard_job.erl
@@ -184,19 +184,12 @@ run(#job{split_state = CurrState} = Job) ->
 
 
 set_start_state(#job{split_state = State} = Job) ->
-    case {State, maps:get(State, ?STATE_RESTART, undefined)} of
-        {_, undefined} ->
+    case maps:get(State, ?STATE_RESTART, undefined) of
+        undefined ->
             Fmt1 = "~p recover : unknown state ~s",
             couch_log:error(Fmt1, [?MODULE, jobfmt(Job)]),
             erlang:error({invalid_split_job_recover_state, Job});
-        {initial_copy, initial_copy} ->
-            % Since we recover from initial_copy to initial_copy, we need
-            % to reset the target state as initial_copy expects to
-            % create a new target
-            Fmt2 = "~p recover : resetting target ~s",
-            couch_log:notice(Fmt2, [?MODULE, jobfmt(Job)]),
-            reset_target(Job);
-        {_, StartState} ->
+        StartState->
             Job#job{split_state = StartState}
     end.
 
@@ -403,6 +396,7 @@ initial_copy_impl(#job{source = Source, target = Targets0} = Job) ->
     LogMsg1 = "~p initial_copy started ~s",
     LogArgs1 = [?MODULE, shardsstr(Source, Targets0)],
     couch_log:notice(LogMsg1, LogArgs1),
+    reset_target(Job),
     case couch_db_split:split(SourceName, TMap, fun pickfun/3) of
         {ok, Seq} ->
             LogMsg2 = "~p initial_copy of ~s finished @ seq:~p",