You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2022/09/26 20:06:15 UTC

[couchdb] branch main updated: Expose the batched open_doc_revs function variant.

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/main by this push:
     new 6a20b5e9d Expose the batched open_doc_revs function variant.
6a20b5e9d is described below

commit 6a20b5e9d350264a55655b39e19fae4923e0bdf3
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Wed Sep 21 23:57:14 2022 -0400

    Expose the batched open_doc_revs function variant.
    
    This is the batched version of the existing `open_doc_revs/4` function. It
    turns out the internal implementation of `open_doc_revs/4` already operated on
    batches, just that the batch size had always been 1, so here we just expose the
    batched variant.
    
    One subtle difference from the single variant, is there are options which apply
    to the whole request, and some apply to individual documents only. In the
    previous `open_doc_revs/4` both were intermingled in the single Options
    argument. In the batched version, the per-document options are passed in each
    `{{Id, Rev}, DocOpts}` tuple. Here DocOpts will usually have `{atts_since,
    ...}` option, but it could have others, like `ejson_body`, for example.
    
    The API is also exposed in `fabric_rpc. This function will be the used to
    optimize `_bulk_get` as described in issue 4183. To allow incremental node
    updates, let's have the backend implementation in a separate commit so it could
    be included in separate rolling updates releases.
    
    Add some tests both for the original `open_doc_revs/4` function and batched
    variant. Since the internal implementation is exactly the same, the batched
    variant mainly checks how multiple results are returned and that per document
    options are applied correctly.
    
    Issue: https://github.com/apache/couchdb/issues/4183
---
 src/couch/src/couch_db.erl                         |  22 ++-
 .../test/eunit/couchdb_open_doc_revs_tests.erl     | 220 +++++++++++++++++++++
 src/fabric/src/fabric_rpc.erl                      |   5 +-
 3 files changed, 244 insertions(+), 3 deletions(-)

diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index dd7e07517..969b93636 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -72,6 +72,7 @@
 
     open_doc/2,
     open_doc/3,
+    open_doc_revs/3,
     open_doc_revs/4,
     open_doc_int/3,
     get_doc_info/2,
@@ -347,6 +348,18 @@ find_ancestor_rev_pos({RevPos, [RevId | Rest]}, AttsSinceRevs) ->
             find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs)
     end.
 
+open_doc_revs(Db, IdRevsOpts, Options) when is_list(IdRevsOpts) ->
+    increment_stat(Db, [couchdb, database_reads], length(IdRevsOpts)),
+    % IdRevsOpts looks like [{{Id, Revs}, DocOpts}, ...]
+    {IdRevs, DocOptsOnly} = lists:unzip(IdRevsOpts),
+    % Function open_doc_revs_int takes [{Id, Revs},...] as its argument
+    AllResults = open_doc_revs_int(Db, IdRevs, Options),
+    % Apply document open options like {atts_since, ...} etc
+    ResultsZipFun = fun(DocOpts, {ok, Results}) ->
+        [apply_open_options(R, DocOpts) || R <- Results]
+    end,
+    lists:zipwith(ResultsZipFun, DocOptsOnly, AllResults).
+
 open_doc_revs(Db, Id, Revs, Options) ->
     increment_stat(Db, [couchdb, database_reads]),
     [{ok, Results}] = open_doc_revs_int(Db, [{Id, Revs}], Options),
@@ -1985,12 +1998,17 @@ after_doc_read(#db{} = Db, Doc) ->
     DocWithBody = couch_doc:with_ejson_body(Doc),
     couch_db_plugin:after_doc_read(Db, DocWithBody).
 
-increment_stat(#db{options = Options}, Stat) ->
+increment_stat(#db{} = Db, Stat) ->
+    increment_stat(Db, Stat, 1).
+
+increment_stat(#db{options = Options}, Stat, Count) when
+    is_integer(Count), Count >= 0
+->
     case lists:member(sys_db, Options) of
         true ->
             ok;
         false ->
-            couch_stats:increment_counter(Stat)
+            couch_stats:increment_counter(Stat, Count)
     end.
 
 -spec normalize_dbname(list() | binary()) -> binary().
diff --git a/src/couch/test/eunit/couchdb_open_doc_revs_tests.erl b/src/couch/test/eunit/couchdb_open_doc_revs_tests.erl
new file mode 100644
index 000000000..78d9c3182
--- /dev/null
+++ b/src/couch/test/eunit/couchdb_open_doc_revs_tests.erl
@@ -0,0 +1,220 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couchdb_open_doc_revs_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-define(TDEF_FE(Name), fun(Arg) -> {atom_to_list(Name), ?_test(Name(Arg))} end).
+
+open_doc_revs_test_() ->
+    {
+        foreach,
+        fun setup/0,
+        fun teardown/1,
+        [
+            ?TDEF_FE(open_doc_revs_single),
+            ?TDEF_FE(open_doc_revs_batch)
+        ]
+    }.
+
+setup() ->
+    Ctx = test_util:start_couch(),
+    DbName = ?tempdb(),
+    {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]),
+    Docs = [
+        doc(<<"1">>, [<<"z">>]),
+        doc(<<"1">>, [<<"x">>, <<"z">>]),
+        doc(<<"1">>, [<<"y">>, <<"z">>]),
+        doc(<<"1">>, [<<"w">>, <<"y">>, <<"z">>]),
+
+        doc(<<"2">>, [<<"m">>]),
+        doc(<<"2">>, [<<"l">>, <<"k">>], true)
+    ],
+    {ok, []} = couch_db:update_docs(Db, Docs, [], ?REPLICATED_CHANGES),
+    ok = couch_db:close(Db),
+    {Ctx, DbName}.
+
+teardown({Ctx, DbName}) ->
+    ok = couch_server:delete(DbName, [?ADMIN_CTX]),
+    test_util:stop_couch(Ctx).
+
+open_doc_revs_single({_, DbName}) ->
+    {ok, Db} = couch_db:open_int(DbName, []),
+
+    % Empty revs list
+    ?assertEqual({ok, []}, couch_db:open_doc_revs(Db, <<"1">>, [], [])),
+
+    % Missing rev
+    ?assertMatch(
+        {ok, [{{not_found, missing}, _}]},
+        couch_db:open_doc_revs(Db, <<"1">>, [{1, <<"foo">>}], [])
+    ),
+
+    % Missing doc
+    ?assertMatch(
+        {ok, [{{not_found, missing}, _}]},
+        couch_db:open_doc_revs(Db, <<"foo">>, [{1, <<"bar">>}], [])
+    ),
+
+    % All revs option
+    {ok, Res1} = couch_db:open_doc_revs(Db, <<"1">>, all, []),
+    ?assertMatch(
+        [
+            {ok, #doc{revs = {2, [<<"x">>, <<"z">>]}}},
+            {ok, #doc{revs = {3, [<<"w">>, <<"y">>, <<"z">>]}}}
+        ],
+        Res1
+    ),
+
+    % Basic revs fetch
+    {ok, Res2} = couch_db:open_doc_revs(Db, <<"1">>, [{2, <<"x">>}], []),
+    ?assertMatch([{ok, #doc{revs = {2, [<<"x">>, <<"z">>]}}}], Res2),
+
+    % Fetch a revision that's not the latest
+    {ok, Res3} = couch_db:open_doc_revs(Db, <<"1">>, [{1, <<"z">>}], []),
+    ?assertMatch([{ok, #doc{revs = {1, [<<"z">>]}}}], Res3),
+
+    % Force latest revision fetchs, since it's a conflict we get 2
+    % revisions back. They are both "latest".
+    {ok, Res4} = couch_db:open_doc_revs(Db, <<"1">>, [{1, <<"z">>}], [latest]),
+    ?assertMatch(
+        [
+            {ok, #doc{revs = {2, [<<"x">>, <<"z">>]}}},
+            {ok, #doc{revs = {3, [<<"w">>, <<"y">>, <<"z">>]}}}
+        ],
+        Res4
+    ),
+
+    % Latest option with rev from a single branch so we get only one
+    % result back.
+    {ok, Res5} = couch_db:open_doc_revs(Db, <<"1">>, [{2, <<"y">>}], [latest]),
+    ?assertMatch(
+        [
+            {ok, #doc{revs = {3, [<<"w">>, <<"y">>, <<"z">>]}}}
+        ],
+        Res5
+    ),
+
+    % Make sure deleted revisions are also returned
+    {ok, Res6} = couch_db:open_doc_revs(Db, <<"2">>, all, []),
+    ?assertMatch(
+        [
+            {ok, #doc{revs = {1, [<<"m">>]}}},
+            {ok, #doc{revs = {2, [<<"l">>, <<"k">>]}, deleted = true}}
+        ],
+        Res6
+    ).
+
+open_doc_revs_batch({_, DbName}) ->
+    {ok, Db} = couch_db:open_int(DbName, []),
+
+    % Empty batch
+    ?assertEqual([], couch_db:open_doc_revs(Db, [], [])),
+
+    % One doc, empty list of revisions
+    ?assertEqual(
+        [[]],
+        couch_db:open_doc_revs(
+            Db,
+            [
+                {{<<"1">>, []}, []}
+            ],
+            []
+        )
+    ),
+
+    % Multiple results. Some found, some not found
+    ?assertMatch(
+        [
+            [
+                {ok, #doc{revs = {2, [<<"x">>, <<"z">>]}}},
+                {ok, #doc{revs = {3, [<<"w">>, <<"y">>, <<"z">>]}}}
+            ],
+            [
+                {{not_found, _}, _}
+            ],
+            [
+                {{not_found, _}, _}
+            ]
+        ],
+        couch_db:open_doc_revs(
+            Db,
+            [
+                {{<<"1">>, all}, []},
+                {{<<"1">>, [{1, <<"foo">>}]}, []},
+                {{<<"foo">>, [{1, <<"bar">>}]}, []}
+            ],
+            []
+        )
+    ),
+
+    % Fetch the exact same doc and revisions
+    ?assertMatch(
+        [
+            [
+                {ok, #doc{revs = {2, [<<"y">>, <<"z">>]}}}
+            ],
+            [
+                {ok, #doc{revs = {2, [<<"y">>, <<"z">>]}}}
+            ]
+        ],
+        couch_db:open_doc_revs(
+            Db,
+            [
+                {{<<"1">>, [{2, <<"y">>}]}, []},
+                {{<<"1">>, [{2, <<"y">>}]}, []}
+            ],
+            []
+        )
+    ),
+
+    % Make sure individual doc options are applied
+    [[{ok, Doc1}], [{ok, Doc2}]] = couch_db:open_doc_revs(
+        Db,
+        [
+            {{<<"1">>, [{2, <<"y">>}]}, [{atts_since, [{2, <<"y">>}]}]},
+            {{<<"1">>, [{2, <<"y">>}]}, []}
+        ],
+        []
+    ),
+    [Att1] = Doc1#doc.atts,
+    [Att2] = Doc2#doc.atts,
+
+    % Only attachments since revision > 2-y will be included, which means for
+    % revisions =< 2-y we'll get a stub only.
+    ?assertEqual(stub, couch_att:fetch(data, Att1)),
+
+    % atts_since shouldn't be applied to the second doc so the stream open
+    % handle to read attachment data should be included.
+    ?assertMatch({stream, _}, couch_att:fetch(data, Att2)).
+
+doc(Id, Revs) ->
+    doc(Id, Revs, false).
+
+doc(Id, Revs, Deleted) ->
+    #doc{
+        id = Id,
+        revs = {length(Revs), Revs},
+        deleted = Deleted,
+        body = {[{<<"data">>, 42}]},
+        atts = [att(<<"att1">>)]
+    }.
+
+att(Name) when is_binary(Name) ->
+    couch_att:new([
+        {name, Name},
+        {att_len, 1},
+        {type, <<"app/binary">>},
+        {data, <<"x">>}
+    ]).
diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl
index eb909519b..8b9c94c87 100644
--- a/src/fabric/src/fabric_rpc.erl
+++ b/src/fabric/src/fabric_rpc.erl
@@ -20,7 +20,7 @@
 ]).
 -export([
     open_doc/3,
-    open_revs/4,
+    open_revs/3, open_revs/4,
     get_doc_info/3,
     get_full_doc_info/3,
     get_missing_revs/2, get_missing_revs/3,
@@ -270,6 +270,9 @@ set_purge_infos_limit(DbName, Limit, Options) ->
 open_doc(DbName, DocId, Options) ->
     with_db(DbName, Options, {couch_db, open_doc, [DocId, Options]}).
 
+open_revs(DbName, IdRevsOpts, Options) ->
+    with_db(DbName, Options, {couch_db, open_doc_revs, [IdRevsOpts, Options]}).
+
 open_revs(DbName, Id, Revs, Options) ->
     with_db(DbName, Options, {couch_db, open_doc_revs, [Id, Revs, Options]}).