You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ko...@apache.org on 2021/10/15 20:57:05 UTC

[couchdb] 01/01: Minimize rewinds when a node is down

This is an automated email from the ASF dual-hosted git repository.

kocolosk pushed a commit to branch 3788-changes-rewind-nodedown
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit c12a19ce4f5475aeee9cb40e7c1b02748f782594
Author: Adam Kocoloski <ko...@apache.org>
AuthorDate: Fri Oct 15 16:55:50 2021 -0400

    Minimize rewinds when a node is down
    
    Our existing logic for handling rewinds in the changes feed addresses
    the following cases:
    
    - A node that contributed to a sequence is in maintenance mode
    - A shard that contributed to a sequence has been split
    
    This patch adds support for cases where the node that contributed to a
    client-supplied sequence is down at the beginning of the request
    handling. It reuses the same logic as the maintenance mode case as these
    two situations really ought to be handled the same way.
    
    A future improvement would be to unify the "node down" and "shard split"
    logic so that we could handle the compound case, e.g. replacing a shard
    from a down node with a pair of shards from nodes that cover the same
    range.
    
    Fixes #3788
---
 src/fabric/src/fabric_view_changes.erl | 39 ++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/fabric/src/fabric_view_changes.erl b/src/fabric/src/fabric_view_changes.erl
index beeaece..af40db5 100644
--- a/src/fabric/src/fabric_view_changes.erl
+++ b/src/fabric/src/fabric_view_changes.erl
@@ -143,30 +143,17 @@ send_changes(DbName, ChangesArgs, Callback, PackedSeqs, AccIn, Timeout) ->
          Ref = rexi:cast(N, {fabric_rpc, changes, [Name, ChangesArgs, Arg]}),
          {S#shard{ref = Ref}, Seq}
     end, WSplitSeqs0),
-    % For ranges that were not split start sequences from 0
-    WReps = lists:map(fun(#shard{name = Name, node = N} = S) ->
-         Ref = rexi:cast(N, {fabric_rpc, changes, [Name, ChangesArgs, 0]}),
+    % For ranges that were not split, look for a replacement on a different node
+    WReps = lists:map(fun(#shard{name = Name, node = NewNode, range = R} = S) ->
+         Arg = find_replacement_sequence(Dead, R),
+         Ref = rexi:cast(NewNode, {fabric_rpc, changes, [Name, ChangesArgs, Arg]}),
          {S#shard{ref = Ref}, 0}
     end, Reps1),
     Seqs = WSeqs ++ WSplitSeqs ++ WReps,
     {Workers0, _} = lists:unzip(Seqs),
     Repls = fabric_ring:get_shard_replacements(DbName, Workers0),
     StartFun = fun(#shard{name=Name, node=N, range=R0}=Shard) ->
-        %% Find the original shard copy in the Seqs array
-        case lists:dropwhile(fun({S, _}) -> S#shard.range =/= R0 end, Seqs) of
-            [{#shard{}, {replace, _, _, _}} | _] ->
-                % Don't attempt to replace a replacement
-                SeqArg = 0;
-            [{#shard{node = OldNode}, OldSeq} | _] ->
-                SeqArg = make_replacement_arg(OldNode, OldSeq);
-            _ ->
-                % TODO this clause is probably unreachable in the N>2
-                % case because we compute replacements only if a shard has one
-                % in the original set.
-                couch_log:error("Streaming ~s from zero while replacing ~p",
-                    [Name, PackedSeqs]),
-                SeqArg = 0
-        end,
+        SeqArg = find_replacement_sequence(Seqs, R0),
         Ref = rexi:cast(N, {fabric_rpc, changes, [Name, ChangesArgs, SeqArg]}),
         Shard#shard{ref = Ref}
     end,
@@ -651,6 +638,22 @@ find_split_shard_replacements(DeadWorkers, Shards) ->
     {fabric_dict:from_list(Workers), Available}.
 
 
+find_replacement_sequence(OriginalSeqs, R0) ->
+    %% Find the original shard copy in the Seqs array
+    case lists:dropwhile(fun({S, _}) -> S#shard.range =/= R0 end, OriginalSeqs) of
+        [{#shard{}, {replace, _, _, _}} | _] ->
+            % Don't attempt to replace a replacement
+            0;
+        [{#shard{node = OldNode}, OldSeq} | _] ->
+            make_replacement_arg(OldNode, OldSeq);
+        _ ->
+            % TODO we don't currently attempt to replace a shard with split
+            % replicas of that range on other nodes, so it's possible to end
+            % up with an empty list here.
+            0
+    end.
+
+
 make_split_seq({Num, Uuid, Node}, RepCount) when RepCount > 1 ->
     {Num, {split, Uuid}, Node};
 make_split_seq(Seq, _) ->