You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2021/10/08 20:35:02 UTC

[couchdb] branch 3.x updated: Try harder to avoid a change feed rewind after a shard move

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch 3.x
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/3.x by this push:
     new 28e39d4  Try harder to avoid a change feed rewind after a shard move
28e39d4 is described below

commit 28e39d47e6d7297ccf283cfa5aa73b447584006a
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Fri Oct 8 13:54:33 2021 -0400

    Try harder to avoid a change feed rewind after a shard move
    
    In the previous attempt [1] we improved the logic by spawning workers on the
    matching target shards only. However, that wasn't enough as workers might still
    reject the passed in sequence from the old node when it asserts ownership
    locally on each shard.
    
    Re-use the already existing replacement clause, where after uuid is matched, we
    try harder to find the highest viable sequence. To use the unit test setup as
    an example, if the shard moved from node1 to node2, and recorded epoch `{node2,
    10}` on the new node, then a sequence generated on node1 before the move, for
    example 12, would rewind down to 10 only when calculated on new location on
    node2, instead of being rewound all the way down to 0.
    
    [1] https://github.com/apache/couchdb/commit/e83935c7f8c3e47b47f07f22ece327f6529d4da0
---
 src/couch/src/couch_db.erl | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index 8837101..0646240 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -1503,17 +1503,20 @@ calculate_start_seq(Db, _Node, {Seq, {split, Uuid}, EpochNode}) ->
                 [?MODULE, Db#db.name, Seq, Uuid, EpochNode, get_epochs(Db)]),
             0
     end;
-calculate_start_seq(Db, _Node, {Seq, Uuid, EpochNode}) ->
+calculate_start_seq(Db, Node, {Seq, Uuid, EpochNode}) ->
     case is_prefix(Uuid, get_uuid(Db)) of
         true ->
             case is_owner(EpochNode, Seq, get_epochs(Db)) of
                 true -> Seq;
                 false ->
-                    couch_log:warning("~p calculate_start_seq not owner "
-                        "db: ~p, seq: ~p, uuid: ~p, epoch_node: ~p, epochs: ~p",
-                        [?MODULE, Db#db.name, Seq, Uuid, EpochNode,
-                            get_epochs(Db)]),
-                    0
+                    %% Shard might have been moved from another node. We
+                    %% matched the uuid already, try to find last viable
+                    %% sequence we can use
+                    couch_log:warning( "~p calculate_start_seq not owner, "
+                        " trying replacement db: ~p, seq: ~p, uuid: ~p, "
+                        "epoch_node: ~p, epochs: ~p", [?MODULE, Db#db.name,
+                        Seq, Uuid, EpochNode, get_epochs(Db)]),
+                    calculate_start_seq(Db, Node, {replace, EpochNode, Uuid, Seq})
             end;
         false ->
             couch_log:warning("~p calculate_start_seq uuid prefix mismatch "
@@ -1984,7 +1987,8 @@ calculate_start_seq_test_() ->
                 t_calculate_start_seq_is_owner(),
                 t_calculate_start_seq_not_owner(),
                 t_calculate_start_seq_raw(),
-                t_calculate_start_seq_epoch_mismatch()
+                t_calculate_start_seq_epoch_mismatch(),
+                t_calculate_start_seq_shard_move()
             ]
         }
     }.
@@ -2028,7 +2032,7 @@ t_calculate_start_seq_is_owner() ->
 t_calculate_start_seq_not_owner() ->
     ?_test(begin
         Db = test_util:fake_db([]),
-        Seq = calculate_start_seq(Db, node1, {15, <<"foo">>}),
+        Seq = calculate_start_seq(Db, node3, {15, <<"foo">>}),
         ?assertEqual(0, Seq)
     end).
 
@@ -2047,6 +2051,18 @@ t_calculate_start_seq_epoch_mismatch() ->
         ?assertEqual(0, Seq)
     end).
 
+t_calculate_start_seq_shard_move() ->
+    ?_test(begin
+        Db = test_util:fake_db([]),
+        % Sequence when shard was on node1
+        ?assertEqual(2, calculate_start_seq(Db, node1, {2, <<"foo">>})),
+        % Sequence from node1 after the move happened, we reset back to the
+        % start of the epoch on node2 = 10
+        ?assertEqual(10, calculate_start_seq(Db, node1, {16, <<"foo">>})),
+        % Invalid node, epoch mismatch, start at 0
+        ?assertEqual(0, calculate_start_seq(Db, node3, {16, <<"foo">>}))
+    end).
+
 is_owner_test() ->
     ?assertNot(is_owner(foo, 1, [])),
     ?assertNot(is_owner(foo, 1, [{foo, 1}])),