You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2022/10/10 04:56:23 UTC

[couchdb] branch optimized-fabric-open-revs updated (6dcc91157 -> fbd8af67b)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch optimized-fabric-open-revs
in repository https://gitbox.apache.org/repos/asf/couchdb.git


 discard 6dcc91157 wip optimized bulk-get
     new fbd8af67b wip optimized bulk-get

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (6dcc91157)
            \
             N -- N -- N   refs/heads/optimized-fabric-open-revs (fbd8af67b)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/fabric/src/fabric_open_revs.erl | 118 ++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 65 deletions(-)


[couchdb] 01/01: wip optimized bulk-get

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch optimized-fabric-open-revs
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit fbd8af67bc6a42b94795839ce02a7070e16ccc04
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Sat Oct 8 02:44:27 2022 -0400

    wip optimized bulk-get
---
 src/fabric/src/fabric.erl           |  17 ++-
 src/fabric/src/fabric_open_revs.erl | 212 ++++++++++++++++++++++++++++++++++++
 2 files changed, 228 insertions(+), 1 deletion(-)

diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl
index 6d779d584..e61521da0 100644
--- a/src/fabric/src/fabric.erl
+++ b/src/fabric/src/fabric.erl
@@ -38,7 +38,7 @@
 % Documents
 -export([
     open_doc/3,
-    open_revs/4,
+    open_revs/3, open_revs/4,
     get_doc_info/3,
     get_full_doc_info/3,
     get_missing_revs/2, get_missing_revs/3,
@@ -272,6 +272,21 @@ open_doc(DbName, Id, Options) ->
             {error, {invalid_option, {doc_info, Else}}}
     end.
 
+%% @doc retrieve a collection of revisions for a batch of docs
+-spec open_revs(dbname(), [{{docid(), [revision()] | all}, option()}], [option()]) ->
+    {ok, [[{ok, #doc{}} | {{not_found, missing}, revision()}]]}
+    | {timeout, any()}
+    | {error, any()}
+    | {error, any(), any()}.
+open_revs(DbName, IdRevsOpts0, Options) ->
+    IdRevsOpts = lists:map(
+        fun({{Id, Revs}, DocOpts}) ->
+            {{docid(Id), Revs}, DocOpts}
+        end,
+        IdRevsOpts0
+    ),
+    fabric_open_revs:go(dbname(DbName), IdRevsOpts, opts(Options)).
+
 %% @doc retrieve a collection of revisions, possible all
 -spec open_revs(dbname(), docid(), [revision()] | all, [option()]) ->
     {ok, [{ok, #doc{}} | {{not_found, missing}, revision()}]}
diff --git a/src/fabric/src/fabric_open_revs.erl b/src/fabric/src/fabric_open_revs.erl
new file mode 100644
index 000000000..df461cb86
--- /dev/null
+++ b/src/fabric/src/fabric_open_revs.erl
@@ -0,0 +1,212 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(fabric_open_revs).
+
+-export([
+    go/3
+]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(req, {
+    idrevs,
+    wcnt = 0,
+    rcnt = 0,
+    responses = []
+}).
+
+-record(st, {
+    r,
+    args,
+    reqs,
+    workers
+}).
+
+go(DbName, IdsRevsOpts, Options) ->
+    DefaultR = integer_to_list(mem3:quorum(DbName)),
+    R = list_to_integer(couch_util:get_value(r, Options, DefaultR)),
+    {ArgRefs, Reqs0} = build_req_map(IdsRevsOpts),
+    ShardMap = build_shard_map(DbName, Reqs0),
+    {Workers, Reqs} = spawn_workers(Reqs0, ShardMap, Options),
+    WShards = maps:keys(Workers),
+    RexiMon = fabric_util:create_monitors(WShards),
+    St = #st{
+        r = R,
+        args = ArgRefs,
+        reqs = Reqs,
+        workers = Workers
+    },
+    try fabric_util:recv(WShards, #shard.ref, fun handle_message/3, St) of
+        {timeout, #st{workers = #{} = Workers1}} ->
+            WShards1 = maps:keys(Workers1),
+            stop_workers(Workers1),
+            % Transform to a fabric_dict to reuse log_timeout/2
+            DefunctWorkers = fabric_dict:init(WShards1, nil),
+            fabric_util:log_timeout(DefunctWorkers, "open_revs"),
+            {error, timeout};
+        Else ->
+            Else
+    after
+        rexi_monitor:stop(RexiMon)
+    end.
+
+handle_message([_ | _] = Resps, Worker, #st{} = St) ->
+    #st{workers = Workers, reqs = Reqs, r = R} = St,
+    {ArgsRefs, Workers1} = maps:take(Worker, Workers),
+    ArgsResps = lists:zip(ArgsRefs, Resps),
+    Reqs1 = lists:foldl(fun responses_fold/2, Reqs, ArgsResps),
+    RNotMet = not r_met(Reqs1, R),
+    HaveViableWorkers = map_size(Workers1) > 0,
+    case RNotMet andalso HaveViableWorkers of
+        true ->
+            {ok, St#st{workers = Workers1, reqs = Reqs1}};
+        false ->
+            stop_workers(Workers1),
+            {stop, finalize(St#st.args, Reqs1)}
+    end;
+handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _Worker, #st{} = St) ->
+    #st{workers = Workers, reqs = Reqs} = St,
+    FilterFun = fun(#shard{node = N}) -> N =:= NodeRef end,
+    DeadKeys = lists:filter(FilterFun, maps:keys(Workers)),
+    Workers1 = maps:without(DeadKeys, Workers),
+    DeadWorkers = maps:with(DeadKeys, Workers),
+    FoldFun = fun(_, ArgRefs, Acc) -> update_wcnt(Acc, ArgRefs, -1) end,
+    Reqs1 = maps:fold(FoldFun, Reqs, DeadWorkers),
+    Error = {error, {nodedown, <<"progress not possible">>}},
+    handle_error(Error, St#st{workers = Workers1, reqs = Reqs1});
+handle_message({rexi_EXIT, Reason}, Worker, #st{} = St) ->
+    handle_message(Reason, Worker, St);
+handle_message({error, Reason}, Worker, #st{} = St) ->
+    handle_message(Reason, Worker, St);
+handle_message(Reason, Worker, #st{} = St) ->
+    #st{workers = Workers, reqs = Reqs} = St,
+    {DeadArgRefs, Workers1} = maps:take(Worker, Workers),
+    Reqs1 = update_wcnt(Reqs, DeadArgRefs, -1),
+    handle_error(Reason, St#st{workers = Workers1, reqs = Reqs1}).
+
+responses_fold({ArgRef, Resp}, #{} = Reqs) ->
+    #{ArgRef := Req} = Reqs,
+    #req{rcnt = R, wcnt = W, responses = Resps} = Req,
+    Reqs#{
+        ArgRef => Req#req{
+            rcnt = R + 1,
+            wcnt = W - 1,
+            responses = lists:umerge(Resps, lists:usort(Resp))
+        }
+    }.
+
+handle_error(Error, #st{workers = Workers, reqs = Reqs} = St) ->
+    case progress_possible(Reqs) of
+        true ->
+            {ok, St};
+        false ->
+            stop_workers(Workers),
+            {error, Error}
+    end.
+
+% Build a map of #{ref => {{Id, Revs}, DocOpts}} arg tuples. Return
+% both the map and references in original argument order. DocOpts can
+% hold huge lists in {atts_since, ...} so it makes sense to operate
+% with a bit more compact tags(references).
+%
+build_req_map(IdsRevsOpts) ->
+    Fun = fun(IdRevsOpts, Acc) ->
+        ArgRef = make_ref(),
+        {ArgRef, Acc#{ArgRef => #req{idrevs = IdRevsOpts}}}
+    end,
+    lists:mapfoldr(Fun, #{}, IdsRevsOpts).
+
+% Build a #{#shard{} => [ArgRef1, ArgRef2, ...]} map. This structure will be
+% used for launching workers and then matching up response with the original
+% args.
+%
+build_shard_map(DbName, #{} = Reqs) ->
+    FoldReqsFun = fun(ArgRef, #req{idrevs = IdRevs}, WAcc) ->
+        {{DocId, _}, _} = IdRevs,
+        FoldShardsFun = fun(Shard, WAccInner) ->
+            UpdateFun = fun(ArgRefs) -> [ArgRef | ArgRefs] end,
+            maps:update_with(Shard, UpdateFun, [ArgRef], WAccInner)
+        end,
+        lists:foldl(FoldShardsFun, WAcc, mem3:shards(DbName, DocId))
+    end,
+    maps:fold(FoldReqsFun, #{}, Reqs).
+
+spawn_workers(#{} = Reqs, #{} = ShardMap, Options) ->
+    Fun = fun(Shard, ArgRefs, {WAcc, ReqsAcc}) ->
+        Worker = rexi_cast(Shard, ArgRefs, ReqsAcc, Options),
+        WAcc1 = WAcc#{Worker => ArgRefs},
+        ReqsAcc1 = update_wcnt(ReqsAcc, ArgRefs, 1),
+        {WAcc1, ReqsAcc1}
+    end,
+    maps:fold(Fun, {#{}, Reqs}, ShardMap).
+
+% Spawn a worker and return an updated #shard{} with worker ref
+% Args are fetched from the Reqs map based on the ArgRef tag
+%
+rexi_cast(#shard{} = Shard, ArgRefs, #{} = Reqs, Options) ->
+    Fun = fun(ArgRef) when is_reference(ArgRef) ->
+        #{ArgRef := #req{idrevs = IdRevs}} = Reqs,
+        IdRevs
+    end,
+    Args = lists:map(Fun, ArgRefs),
+    RexiArgs = {fabric_rpc, open_revs, [Shard#shard.name, Args, Options]},
+    WRef = rexi:cast(Shard#shard.node, RexiArgs),
+    Shard#shard{ref = WRef}.
+
+% Update worker count for each of the #req{} records. Value may be positive
+% or negative, which could be used to decrement worker counts
+%
+update_wcnt(#{} = Reqs, ArgRefs, Val) when is_integer(Val) ->
+    Fun = fun(Ref, Acc) ->
+        #{Ref := #req{wcnt = W} = Req} = Acc,
+        Acc#{Ref => Req#req{wcnt = W + Val}}
+    end,
+    lists:foldl(Fun, Reqs, ArgRefs).
+
+progress_possible(#{} = Reqs) ->
+    Fun = fun(_, #req{wcnt = W, rcnt = R}, Acc) -> Acc andalso W + R > 0 end,
+    maps:fold(Fun, true, Reqs).
+
+r_met(#{} = Reqs, ExpectedR) ->
+    Fun = fun(_, #req{rcnt = R}, Acc) -> min(R, Acc) end,
+    maps:fold(Fun, ExpectedR, Reqs) >= ExpectedR.
+
+finalize(ArgRefs, #{} = Reqs) ->
+    Fun = fun(Ref) ->
+        #{Ref := #req{responses = Resps}} = Reqs,
+        finalize_req(Resps)
+    end,
+    lists:map(Fun, ArgRefs).
+
+finalize_req(DocRevs) ->
+    Paths = lists:map(fun rev_to_path/1, DocRevs),
+    RevTree = couch_key_tree:multi_merge([], Paths),
+    TreeLeafs = couch_key_tree:get_all_leafs(RevTree),
+    % latest / all can return [] but exact ones get not_found,missing?
+    lists:map(fun path_to_reply/1, TreeLeafs).
+
+path_to_reply({?REV_MISSING, {Pos, [Rev]}}) ->
+    {{not_found, missing}, {Pos, Rev}};
+path_to_reply({#doc{} = Doc, _}) ->
+    {ok, Doc}.
+
+rev_to_path({ok, #doc{} = Doc}) ->
+    couch_doc:to_path(Doc);
+rev_to_path({{not_found, missing}, {Pos, Rev}}) ->
+    {Pos, {Rev, ?REV_MISSING, []}}.
+
+stop_workers(#{} = Workers) ->
+    Fun = fun(#shard{node = Node, ref = Ref}) -> {Node, Ref} end,
+    NodeRefs = lists:map(Fun, maps:keys(Workers)),
+    rexi:kill_all(NodeRefs).