You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2023/01/12 04:55:06 UTC

[couchdb] 01/01: Ensure design docs are upload individually when replicating with _bulk_get

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-invididual-design-docs-replication
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit b5b3ab9db4223caab81a780372111104d9109507
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Wed Jan 11 23:51:05 2023 -0500

    Ensure design docs are upload individually when replicating with _bulk_get
    
    Previously, when replication jobs used _bulk_get, they didn't upload design
    docs individually like they do when not using _bulk_get.
    
    Add tests to cover both attachments and ddoc cases. meck:num_calls/3 is helpful
    as it allows to nicely assert which API function was called and how many times.
---
 .../src/couch_replicator_worker.erl                |  7 +-
 .../test/eunit/couch_replicator_bulk_get_tests.erl | 79 +++++++++++++++++++++-
 2 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_worker.erl b/src/couch_replicator/src/couch_replicator_worker.erl
index d8f872388..c0ac23a46 100644
--- a/src/couch_replicator/src/couch_replicator_worker.erl
+++ b/src/couch_replicator/src/couch_replicator_worker.erl
@@ -298,12 +298,12 @@ queue_fetch_loop(#fetch_st{} = St) ->
             % Find missing revisions (POST to _revs_diff)
             {IdRevs, RdSt1} = find_missing(Changes, Target, Parent, RdSt),
             {Docs, BgSt1} = bulk_get(UseBulkGet, Source, IdRevs, Parent, BgSt),
-            % Documents without attachments can be uploaded right away
+            % Upload non-design docs and docs without attachments
             BatchFun = fun({_, #doc{} = Doc}) ->
                 ok = gen_server:call(Parent, {batch_doc, Doc}, infinity)
             end,
             lists:foreach(BatchFun, lists:sort(maps:to_list(Docs))),
-            % Fetch individually if _bulk_get failed or there are attachments
+            % Individually fetch design docs and docs with attachments
             FetchFun = fun({Id, Rev}, PAs) ->
                 ok = gen_server:call(Parent, {fetch_doc, {Id, [Rev], PAs}}, infinity)
             end,
@@ -349,8 +349,9 @@ bulk_get(#httpdb{} = Source, #{} = IdRevs) ->
     case couch_replicator_api_wrap:bulk_get(Source, IdRevs, Opts) of
         {ok, #{} = Docs} ->
             FilterFun = fun
-                (_, #doc{atts = []}) -> true;
+                (_, #doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>}) -> false;
                 (_, #doc{atts = [_ | _]}) -> false;
+                (_, #doc{atts = []}) -> true;
                 (_, {error, _}) -> false
             end,
             maps:filter(FilterFun, Docs);
diff --git a/src/couch_replicator/test/eunit/couch_replicator_bulk_get_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_bulk_get_tests.erl
index 2ecd0f4ee..f0d9569db 100644
--- a/src/couch_replicator/test/eunit/couch_replicator_bulk_get_tests.erl
+++ b/src/couch_replicator/test/eunit/couch_replicator_bulk_get_tests.erl
@@ -26,7 +26,11 @@ bulk_get_test_() ->
             fun couch_replicator_test_helper:test_teardown/1,
             [
                 ?TDEF_FE(use_bulk_get),
+                ?TDEF_FE(use_bulk_get_with_ddocs),
+                ?TDEF_FE(use_bulk_get_with_attachments),
                 ?TDEF_FE(dont_use_bulk_get),
+                ?TDEF_FE(dont_use_bulk_get_ddocs),
+                ?TDEF_FE(dont_use_bulk_get_attachments),
                 ?TDEF_FE(job_enable_overrides_global_disable),
                 ?TDEF_FE(global_disable_works)
             ]
@@ -39,7 +43,33 @@ use_bulk_get({_Ctx, {Source, Target}}) ->
     replicate(Source, Target, true),
     BulkGets = meck:num_calls(couch_replicator_api_wrap, bulk_get, 3),
     JustGets = meck:num_calls(couch_replicator_api_wrap, open_doc_revs, 6),
+    DocUpdates = meck:num_calls(couch_replicator_api_wrap, update_doc, 4),
     ?assertEqual(0, JustGets),
+    ?assertEqual(0, DocUpdates),
+    ?assert(BulkGets >= 1),
+    compare_dbs(Source, Target).
+
+use_bulk_get_with_ddocs({_Ctx, {Source, Target}}) ->
+    populate_db_ddocs(Source, ?DOC_COUNT),
+    meck:new(couch_replicator_api_wrap, [passthrough]),
+    replicate(Source, Target, true),
+    BulkGets = meck:num_calls(couch_replicator_api_wrap, bulk_get, 3),
+    JustGets = meck:num_calls(couch_replicator_api_wrap, open_doc_revs, 6),
+    DocUpdates = meck:num_calls(couch_replicator_api_wrap, update_doc, 4),
+    ?assertEqual(?DOC_COUNT, JustGets),
+    ?assertEqual(?DOC_COUNT, DocUpdates),
+    ?assert(BulkGets >= 1),
+    compare_dbs(Source, Target).
+
+use_bulk_get_with_attachments({_Ctx, {Source, Target}}) ->
+    populate_db_atts(Source, ?DOC_COUNT),
+    meck:new(couch_replicator_api_wrap, [passthrough]),
+    replicate(Source, Target, true),
+    BulkGets = meck:num_calls(couch_replicator_api_wrap, bulk_get, 3),
+    JustGets = meck:num_calls(couch_replicator_api_wrap, open_doc_revs, 6),
+    DocUpdates = meck:num_calls(couch_replicator_api_wrap, update_doc, 4),
+    ?assertEqual(?DOC_COUNT, JustGets),
+    ?assertEqual(?DOC_COUNT, DocUpdates),
     ?assert(BulkGets >= 1),
     compare_dbs(Source, Target).
 
@@ -49,10 +79,36 @@ dont_use_bulk_get({_Ctx, {Source, Target}}) ->
     replicate(Source, Target, false),
     BulkGets = meck:num_calls(couch_replicator_api_wrap, bulk_get, 3),
     JustGets = meck:num_calls(couch_replicator_api_wrap, open_doc_revs, 6),
+    DocUpdates = meck:num_calls(couch_replicator_api_wrap, update_doc, 4),
     ?assertEqual(0, BulkGets),
+    ?assertEqual(0, DocUpdates),
     ?assertEqual(?DOC_COUNT, JustGets),
     compare_dbs(Source, Target).
 
+dont_use_bulk_get_ddocs({_Ctx, {Source, Target}}) ->
+    populate_db_ddocs(Source, ?DOC_COUNT),
+    meck:new(couch_replicator_api_wrap, [passthrough]),
+    replicate(Source, Target, false),
+    BulkGets = meck:num_calls(couch_replicator_api_wrap, bulk_get, 3),
+    JustGets = meck:num_calls(couch_replicator_api_wrap, open_doc_revs, 6),
+    DocUpdates = meck:num_calls(couch_replicator_api_wrap, update_doc, 4),
+    ?assertEqual(0, BulkGets),
+    ?assertEqual(?DOC_COUNT, JustGets),
+    ?assertEqual(?DOC_COUNT, DocUpdates),
+    compare_dbs(Source, Target).
+
+dont_use_bulk_get_attachments({_Ctx, {Source, Target}}) ->
+    populate_db_atts(Source, ?DOC_COUNT),
+    meck:new(couch_replicator_api_wrap, [passthrough]),
+    replicate(Source, Target, false),
+    BulkGets = meck:num_calls(couch_replicator_api_wrap, bulk_get, 3),
+    JustGets = meck:num_calls(couch_replicator_api_wrap, open_doc_revs, 6),
+    DocUpdates = meck:num_calls(couch_replicator_api_wrap, update_doc, 4),
+    ?assertEqual(0, BulkGets),
+    ?assertEqual(?DOC_COUNT, JustGets),
+    ?assertEqual(?DOC_COUNT, DocUpdates),
+    compare_dbs(Source, Target).
+
 job_enable_overrides_global_disable({_Ctx, {Source, Target}}) ->
     populate_db(Source, ?DOC_COUNT),
     Persist = false,
@@ -78,10 +134,31 @@ global_disable_works({_Ctx, {Source, Target}}) ->
     compare_dbs(Source, Target).
 
 populate_db(DbName, DocCount) ->
-    Fun = fun(Id, Acc) -> [#doc{id = integer_to_binary(Id)} | Acc] end,
+    IdFun = fun(Id) -> integer_to_binary(Id) end,
+    Fun = fun(Id, Acc) -> [#doc{id = IdFun(Id)} | Acc] end,
+    Docs = lists:foldl(Fun, [], lists:seq(1, DocCount)),
+    {ok, _} = fabric:update_docs(DbName, Docs, [?ADMIN_CTX]).
+
+populate_db_ddocs(DbName, DocCount) ->
+    IdFun = fun(Id) -> <<"_design/", (integer_to_binary(Id))/binary>> end,
+    Fun = fun(Id, Acc) -> [#doc{id = IdFun(Id)} | Acc] end,
     Docs = lists:foldl(Fun, [], lists:seq(1, DocCount)),
     {ok, _} = fabric:update_docs(DbName, Docs, [?ADMIN_CTX]).
 
+populate_db_atts(DbName, DocCount) ->
+    IdFun = fun(Id) -> integer_to_binary(Id) end,
+    Fun = fun(Id, Acc) -> [#doc{id = IdFun(Id), atts = [att(<<"a">>)]} | Acc] end,
+    Docs = lists:foldl(Fun, [], lists:seq(1, DocCount)),
+    {ok, _} = fabric:update_docs(DbName, Docs, [?ADMIN_CTX]).
+
+att(Name) when is_binary(Name) ->
+    couch_att:new([
+        {name, Name},
+        {att_len, 1},
+        {type, <<"app/binary">>},
+        {data, <<"x">>}
+    ]).
+
 compare_dbs(Source, Target) ->
     couch_replicator_test_helper:cluster_compare_dbs(Source, Target).