You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2023/06/30 19:57:39 UTC

[couchdb] branch undefined-in-mem3-rep updated (ad4a7dd02 -> d84a66a30)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch undefined-in-mem3-rep
in repository https://gitbox.apache.org/repos/asf/couchdb.git


 discard ad4a7dd02 Fix undefined range in mem3_rep purge replication logic
     new d84a66a30 Fix undefined range in mem3_rep purge replication logic

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (ad4a7dd02)
            \
             N -- N -- N   refs/heads/undefined-in-mem3-rep (d84a66a30)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/mem3/test/eunit/mem3_rep_test.erl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)


[couchdb] 01/01: Fix undefined range in mem3_rep purge replication logic

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch undefined-in-mem3-rep
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit d84a66a3041b357fe8c18179469d5cab5a626f66
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Fri Jun 30 15:35:00 2023 -0400

    Fix undefined range in mem3_rep purge replication logic
    
    When replication jobs are started with just shard names (paths), mem3_rep
    creates `#shard{}` records from those and fills in some fields like `name` and
    `node`. However we had left the `range` field as `undefined`. Until a recent
    fix to avoid duplicating document purges [1], we didn't care about ranges, so
    it was just untidy but not a bit problem. Since the fix, we rely on having a
    proper ranges and would get function or case clause errors during purge infos
    pulling and pushing operations.
    
    The fix is to use the `mem3:range/1` function to obtain the range based on the
    shard path if we can. In some cases we may replicate node local databases, so
    we still have to account for the cases that ranges may be `undefined`.
    
    In addition to the fix, improve the tests coverage by testing replication of
    purges and local endpoints. Also, take the opportunity to clean up the
    mem3_rep_tests suite to use the newer ?TDEF_FE macro.
    
    [1] https://github.com/apache/couchdb/pull/4626
---
 src/mem3/src/mem3_rep.erl             |  50 +++++++-
 src/mem3/test/eunit/mem3_rep_test.erl | 229 ++++++++++++++++++++++------------
 2 files changed, 194 insertions(+), 85 deletions(-)

diff --git a/src/mem3/src/mem3_rep.erl b/src/mem3/src/mem3_rep.erl
index 3cc381494..b452fd2fa 100644
--- a/src/mem3/src/mem3_rep.erl
+++ b/src/mem3/src/mem3_rep.erl
@@ -59,7 +59,9 @@ go(Source, Target) ->
 
 go(DbName, Node, Opts) when is_binary(DbName), is_atom(Node) ->
     go(#shard{name = DbName, node = node()}, #shard{name = DbName, node = Node}, Opts);
-go(#shard{} = Source, #shard{} = Target, Opts) ->
+go(#shard{} = Source0, #shard{} = Target0, Opts) ->
+    Source = add_range(Source0),
+    Target = add_range(Target0),
     case mem3:db_is_current(Source) of
         true ->
             go(Source, targets_map(Source, Target), Opts);
@@ -366,7 +368,15 @@ pull_purges(Db, Count, #shard{} = SrcShard, #tgt{} = Tgt0, HashFun) ->
             % out using the same pickfun which we use when picking documents
             #shard{range = SrcRange} = SrcShard,
             BelongsFun = fun({_UUID, Id, _Revs}) when is_binary(Id) ->
-                mem3_reshard_job:pickfun(Id, [SrcRange], HashFun) =:= SrcRange
+                case SrcRange of
+                    [B, E] when is_integer(B), is_integer(E) ->
+                        mem3_reshard_job:pickfun(Id, [SrcRange], HashFun) =:= SrcRange;
+                    undefined ->
+                        % We may replicate node-local databases
+                        % which are not associated with a shard range. In that case
+                        % range will be undefined.
+                        true
+                end
             end,
             Infos1 = lists:filter(BelongsFun, Infos),
             {ok, _} = couch_db:purge_docs(Db, Infos1, [?REPLICATED_CHANGES]),
@@ -418,7 +428,14 @@ push_purges(Db, BatchSize, SrcShard, Tgt, HashFun) ->
                 erlang:max(0, Oldest - 1)
         end,
     BelongsFun = fun(Id) when is_binary(Id) ->
-        mem3_reshard_job:pickfun(Id, [TgtRange], HashFun) =:= TgtRange
+        case TgtRange of
+            [B, E] when is_integer(B), is_integer(E) ->
+                mem3_reshard_job:pickfun(Id, [TgtRange], HashFun) =:= TgtRange;
+            undefined ->
+                % We may replicate node-local databases which are not associated
+                % with a shard range. In that case range will be undefined.
+                true
+        end
     end,
     FoldFun = fun({PSeq, UUID, Id, Revs}, {Count, Infos, _}) ->
         case BelongsFun(Id) of
@@ -875,10 +892,37 @@ reset_remaining(#{} = Targets) ->
         Targets
     ).
 
+add_range(#shard{name = DbName} = Shard) when is_binary(DbName) ->
+    case DbName of
+        <<"shards/", _Start:8/binary, "-", _End:8/binary, "/", _/binary>> ->
+            Shard#shard{range = mem3:range(DbName)};
+        <<_/binary>> ->
+            % We may replicate local dbs which do not have a shard range.
+            Shard
+    end.
+
 -ifdef(TEST).
 
 -include_lib("couch/include/couch_eunit.hrl").
 
+name_node_to_shard_local_db_test() ->
+    DbName = <<"foo">>,
+    Node = 'n1@bar.net',
+    Shard = add_range(#shard{name = DbName, node = Node}),
+    ?assertMatch(#shard{}, Shard),
+    ?assertEqual(DbName, Shard#shard.name),
+    ?assertEqual(Node, Shard#shard.node),
+    ?assertEqual(undefined, Shard#shard.range).
+
+name_node_to_shard_local_shard_test() ->
+    DbName = <<"shards/00000000-7fffffff/db.1687450595">>,
+    Node = 'n2@baz.org',
+    Shard = add_range(#shard{name = DbName, node = Node}),
+    ?assertMatch(#shard{}, Shard),
+    ?assertEqual(DbName, Shard#shard.name),
+    ?assertEqual(Node, Shard#shard.node),
+    ?assertEqual([0, 2147483647], Shard#shard.range).
+
 find_source_seq_int_test_() ->
     {
         setup,
diff --git a/src/mem3/test/eunit/mem3_rep_test.erl b/src/mem3/test/eunit/mem3_rep_test.erl
index 31a6d9b77..470fb208d 100644
--- a/src/mem3/test/eunit/mem3_rep_test.erl
+++ b/src/mem3/test/eunit/mem3_rep_test.erl
@@ -24,15 +24,29 @@
 setup() ->
     {AllSrc, AllTgt} = {?tempdb(), ?tempdb()},
     {PartSrc, PartTgt} = {?tempdb(), ?tempdb()},
+    Localdb = ?tempdb(),
     create_db(AllSrc, [{q, 1}, {n, 1}]),
     create_db(AllTgt, [{q, 2}, {n, 1}]),
     PartProps = [{partitioned, true}, {hash, [couch_partition, hash, []]}],
     create_db(PartSrc, [{q, 1}, {n, 1}, {props, PartProps}]),
     create_db(PartTgt, [{q, 2}, {n, 1}, {props, PartProps}]),
-    #{allsrc => AllSrc, alltgt => AllTgt, partsrc => PartSrc, parttgt => PartTgt}.
+    create_local_db(Localdb),
+    #{
+        allsrc => AllSrc,
+        alltgt => AllTgt,
+        partsrc => PartSrc,
+        parttgt => PartTgt,
+        localdb => Localdb
+    }.
 
 teardown(#{} = Dbs) ->
-    maps:map(fun(_, Db) -> delete_db(Db) end, Dbs).
+    maps:map(
+        fun
+            (localdb, Db) -> delete_local_db(Db);
+            (_, Db) -> delete_db(Db)
+        end,
+        Dbs
+    ).
 
 start_couch() ->
     test_util:start_couch([mem3, fabric]).
@@ -52,102 +66,133 @@ mem3_reshard_db_test_() ->
                 fun setup/0,
                 fun teardown/1,
                 [
-                    fun replicate_basics/1,
-                    fun replicate_small_batches/1,
-                    fun replicate_low_batch_count/1,
-                    fun replicate_with_partitions/1
+                    ?TDEF_FE(replicate_basics, ?TIMEOUT),
+                    ?TDEF_FE(replicate_small_batches, ?TIMEOUT),
+                    ?TDEF_FE(replicate_low_batch_count, ?TIMEOUT),
+                    ?TDEF_FE(replicate_with_partitions, ?TIMEOUT),
+                    ?TDEF_FE(replicate_to_and_from_local, ?TIMEOUT),
+                    ?TDEF_FE(replicate_with_purges, ?TIMEOUT)
                 ]
             }
         }
     }.
 
 replicate_basics(#{allsrc := AllSrc, alltgt := AllTgt}) ->
-    {timeout, ?TIMEOUT,
-        ?_test(begin
-            DocSpec = #{docs => 10, delete => [5, 9]},
-            add_test_docs(AllSrc, DocSpec),
-            SDocs = get_all_docs(AllSrc),
-
-            [Src] = lists:sort(mem3:local_shards(AllSrc)),
-            [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
-            #shard{range = R1} = Tgt1,
-            #shard{range = R2} = Tgt2,
-            TMap = #{R1 => Tgt1, R2 => Tgt2},
-            Opts = [{batch_size, 1000}, {batch_count, all}],
-            ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
-
-            ?assertEqual(SDocs, get_all_docs(AllTgt))
-        end)}.
+    DocSpec = #{docs => 10, delete => [5, 9]},
+    add_test_docs(AllSrc, DocSpec),
+    SDocs = get_all_docs(AllSrc),
+
+    [Src] = lists:sort(mem3:local_shards(AllSrc)),
+    [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
+    #shard{range = R1} = Tgt1,
+    #shard{range = R2} = Tgt2,
+    TMap = #{R1 => Tgt1, R2 => Tgt2},
+    Opts = [{batch_size, 1000}, {batch_count, all}],
+    ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
+
+    ?assertEqual(SDocs, get_all_docs(AllTgt)).
 
 replicate_small_batches(#{allsrc := AllSrc, alltgt := AllTgt}) ->
-    {timeout, ?TIMEOUT,
-        ?_test(begin
-            DocSpec = #{docs => 10, delete => [5, 9]},
-            add_test_docs(AllSrc, DocSpec),
-            SDocs = get_all_docs(AllSrc),
-
-            [Src] = lists:sort(mem3:local_shards(AllSrc)),
-            [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
-            #shard{range = R1} = Tgt1,
-            #shard{range = R2} = Tgt2,
-            TMap = #{R1 => Tgt1, R2 => Tgt2},
-            Opts = [{batch_size, 2}, {batch_count, all}],
-            ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
-
-            ?assertEqual(SDocs, get_all_docs(AllTgt))
-        end)}.
+    DocSpec = #{docs => 10, delete => [5, 9]},
+    add_test_docs(AllSrc, DocSpec),
+    SDocs = get_all_docs(AllSrc),
+
+    [Src] = lists:sort(mem3:local_shards(AllSrc)),
+    [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
+    #shard{range = R1} = Tgt1,
+    #shard{range = R2} = Tgt2,
+    TMap = #{R1 => Tgt1, R2 => Tgt2},
+    Opts = [{batch_size, 2}, {batch_count, all}],
+    ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
+
+    ?assertEqual(SDocs, get_all_docs(AllTgt)).
 
 replicate_low_batch_count(#{allsrc := AllSrc, alltgt := AllTgt}) ->
-    {timeout, ?TIMEOUT,
-        ?_test(begin
-            DocSpec = #{docs => 10, delete => [5, 9]},
-            add_test_docs(AllSrc, DocSpec),
-            SDocs = get_all_docs(AllSrc),
+    DocSpec = #{docs => 10, delete => [5, 9]},
+    add_test_docs(AllSrc, DocSpec),
+    SDocs = get_all_docs(AllSrc),
 
-            [Src] = lists:sort(mem3:local_shards(AllSrc)),
-            [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
-            #shard{range = R1} = Tgt1,
-            #shard{range = R2} = Tgt2,
-            TMap = #{R1 => Tgt1, R2 => Tgt2},
+    [Src] = lists:sort(mem3:local_shards(AllSrc)),
+    [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
+    #shard{range = R1} = Tgt1,
+    #shard{range = R2} = Tgt2,
+    TMap = #{R1 => Tgt1, R2 => Tgt2},
 
-            Opts1 = [{batch_size, 2}, {batch_count, 1}],
-            ?assertMatch({ok, 8}, mem3_rep:go(Src, TMap, Opts1)),
+    Opts1 = [{batch_size, 2}, {batch_count, 1}],
+    ?assertMatch({ok, 8}, mem3_rep:go(Src, TMap, Opts1)),
 
-            Opts2 = [{batch_size, 1}, {batch_count, 2}],
-            ?assertMatch({ok, 6}, mem3_rep:go(Src, TMap, Opts2)),
+    Opts2 = [{batch_size, 1}, {batch_count, 2}],
+    ?assertMatch({ok, 6}, mem3_rep:go(Src, TMap, Opts2)),
 
-            Opts3 = [{batch_size, 1000}, {batch_count, all}],
-            ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts3)),
+    Opts3 = [{batch_size, 1000}, {batch_count, all}],
+    ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts3)),
 
-            ?assertEqual(SDocs, get_all_docs(AllTgt))
-        end)}.
+    ?assertEqual(SDocs, get_all_docs(AllTgt)).
 
 replicate_with_partitions(#{partsrc := PartSrc, parttgt := PartTgt}) ->
-    {timeout, ?TIMEOUT,
-        ?_test(begin
-            DocSpec = #{
-                pdocs => #{
-                    <<"PX">> => 15,
-                    <<"PY">> => 19
-                }
-            },
-            add_test_docs(PartSrc, DocSpec),
-            SDocs = get_all_docs(PartSrc),
-            PXSrc = get_partition_info(PartSrc, <<"PX">>),
-            PYSrc = get_partition_info(PartSrc, <<"PY">>),
-
-            [Src] = lists:sort(mem3:local_shards(PartSrc)),
-            [Tgt1, Tgt2] = lists:sort(mem3:local_shards(PartTgt)),
-            #shard{range = R1} = Tgt1,
-            #shard{range = R2} = Tgt2,
-            TMap = #{R1 => Tgt1, R2 => Tgt2},
-            Opts = [{batch_size, 1000}, {batch_count, all}],
-            ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
-
-            ?assertEqual(PXSrc, get_partition_info(PartTgt, <<"PX">>)),
-            ?assertEqual(PYSrc, get_partition_info(PartTgt, <<"PY">>)),
-            ?assertEqual(SDocs, get_all_docs(PartTgt))
-        end)}.
+    DocSpec = #{
+        pdocs => #{
+            <<"PX">> => 15,
+            <<"PY">> => 19
+        }
+    },
+    add_test_docs(PartSrc, DocSpec),
+    SDocs = get_all_docs(PartSrc),
+    PXSrc = get_partition_info(PartSrc, <<"PX">>),
+    PYSrc = get_partition_info(PartSrc, <<"PY">>),
+
+    [Src] = lists:sort(mem3:local_shards(PartSrc)),
+    [Tgt1, Tgt2] = lists:sort(mem3:local_shards(PartTgt)),
+    #shard{range = R1} = Tgt1,
+    #shard{range = R2} = Tgt2,
+    TMap = #{R1 => Tgt1, R2 => Tgt2},
+    Opts = [{batch_size, 1000}, {batch_count, all}],
+    ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
+
+    ?assertEqual(PXSrc, get_partition_info(PartTgt, <<"PX">>)),
+    ?assertEqual(PYSrc, get_partition_info(PartTgt, <<"PY">>)),
+    ?assertEqual(SDocs, get_all_docs(PartTgt)).
+
+replicate_with_purges(#{allsrc := AllSrc, alltgt := AllTgt}) ->
+    DocSpec = #{docs => 10, delete => [5, 9], purge => [2, 4]},
+    add_test_docs(AllSrc, DocSpec),
+    % Add and purge some docs on target to excercise the pull_purges code path
+    add_test_docs(AllTgt, #{docs => 3, purge => [0, 2]}),
+
+    [Src] = lists:sort(mem3:local_shards(AllSrc)),
+    [Tgt1, Tgt2] = lists:sort(mem3:local_shards(AllTgt)),
+    #shard{range = R1} = Tgt1,
+    #shard{range = R2} = Tgt2,
+    TMap = #{R1 => Tgt1, R2 => Tgt2},
+    Opts = [{batch_size, 1000}, {batch_count, all}],
+    ?assertMatch({ok, 0}, mem3_rep:go(Src, TMap, Opts)),
+
+    SDocs = get_all_docs(AllSrc),
+    % Purges from the target should have been pulled and removed docs 0,1,2.
+    % Source should have no live docs.
+    ?assertEqual(#{}, SDocs),
+    ?assertEqual(#{}, get_all_docs(AllTgt)).
+
+replicate_to_and_from_local(#{localdb := LocalDb, allsrc := ClusteredDb}) ->
+    % We'll just tests that we can pull purges from the target
+    add_test_docs(ClusteredDb, #{docs => 6, purge => [0, 2]}),
+
+    [#shard{name = TgtDbName}] = mem3:local_shards(ClusteredDb),
+    Opts = [{batch_size, 1000}, {batch_count, all}],
+    Src1 = #shard{name = LocalDb, node = node()},
+    Tgt1 = #shard{name = TgtDbName, node = node()},
+    ?assertMatch({ok, 0}, mem3_rep:go(Src1, Tgt1, Opts)),
+
+    % Purge a few more docs in clustered db
+    add_test_docs(ClusteredDb, #{purge => [3, 4]}),
+
+    % Replicate the other way: from clustered to source
+    Src2 = #shard{name = TgtDbName, node = node()},
+    Tgt2 = #shard{name = LocalDb, node = node()},
+    ?assertMatch({ok, 0}, mem3_rep:go(Src2, Tgt2, Opts)),
+    SDocs = get_all_docs(ClusteredDb),
+    ?assertEqual(1, map_size(SDocs)),
+    ?assertMatch(#{<<"00005">> := #{}}, SDocs).
 
 get_partition_info(DbName, Partition) ->
     with_proc(fun() ->
@@ -197,6 +242,13 @@ delete_db(DbName) ->
     GL = erlang:group_leader(),
     with_proc(fun() -> fabric:delete_db(DbName, [?ADMIN_CTX]) end, GL).
 
+create_local_db(DbName) ->
+    {ok, _} = couch_server:create(DbName, []),
+    ok.
+
+delete_local_db(DbName) ->
+    couch_server:delete(DbName, []).
+
 with_proc(Fun) ->
     with_proc(Fun, undefined, 30000).
 
@@ -237,7 +289,7 @@ add_test_docs(DbName, #{} = DocSpec) ->
         [] -> ok;
         [_ | _] = Deleted -> update_docs(DbName, Deleted)
     end,
-    ok.
+    purge_docs(DbName, maps:get(purge, DocSpec, [])).
 
 update_docs(DbName, Docs) ->
     with_proc(fun() ->
@@ -261,6 +313,19 @@ delete_docs([S, E], Docs) when E >= S ->
 delete_docs(_, _) ->
     [].
 
+purge_docs(DbName, [S, E]) when E >= S ->
+    Ids = [doc_id(<<"">>, I) || I <- lists:seq(S, E)],
+    IdRevs = [{Id, get_revs(DbName, Id)} || Id <- Ids],
+    {ok, _} = fabric:purge_docs(DbName, IdRevs, []),
+    ok;
+purge_docs(_DbName, []) ->
+    ok.
+
+get_revs(DbName, DocId) ->
+    FDI = fabric:get_full_doc_info(DbName, DocId, []),
+    #doc_info{revs = Revs} = couch_doc:to_doc_info(FDI),
+    [Rev#rev_info.rev || Rev <- Revs].
+
 pdocs(#{} = PMap) ->
     maps:fold(
         fun(Part, DocSpec, DocsAcc) ->