You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2020/08/05 18:34:06 UTC

[couchdb] 04/05: Use ebtree for reduce functions

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-ebtree-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 18a22286ec283101d2306e08c870fb3c3d8bd86b
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Jul 29 10:34:48 2020 -0500

    Use ebtree for reduce functions
---
 src/couch_views/src/couch_views.erl        |   4 -
 src/couch_views/src/couch_views_fdb.erl    | 219 +++++++++++++++++++++--------
 src/couch_views/src/couch_views_reader.erl | 153 +++++++++++++++++++-
 3 files changed, 308 insertions(+), 68 deletions(-)

diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl
index f6e163a..eea7c89 100644
--- a/src/couch_views/src/couch_views.erl
+++ b/src/couch_views/src/couch_views.erl
@@ -49,10 +49,6 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) ->
     Args2 = couch_mrview_util:set_view_type(Args1, ViewName, Views),
     Args3 = couch_mrview_util:validate_args(Args2),
     ok = check_range(Args3),
-    case is_reduce_view(Args3) of
-        true -> throw(not_implemented);
-        false -> ok
-    end,
 
     try
         fabric2_fdb:transactional(Db, fun(TxDb) ->
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index 379bca4..83795b7 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -28,6 +28,7 @@
     get_kv_size/2,
 
     fold_map_idx/5,
+    fold_red_idx/6,
 
     write_doc/3,
 
@@ -144,7 +145,7 @@ get_row_count(TxDb, View) ->
     #{
         tx := Tx
     } = TxDb,
-    {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+    {Count, _, _} = ebtree:full_reduce(Tx, View#mrview.btree),
     Count.
 
 
@@ -152,7 +153,7 @@ get_kv_size(TxDb, View) ->
     #{
         tx := Tx
     } = TxDb,
-    {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+    {_, TotalSize, _} = ebtree:full_reduce(Tx, View#mrview.btree),
     TotalSize.
 
 
@@ -223,6 +224,74 @@ fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
     end.
 
 
+fold_red_idx(TxDb, View, Idx, Options, Callback, Acc0) ->
+    #{
+        tx := Tx
+    } = TxDb,
+    #mrview{
+        btree = Btree
+    } = View,
+
+    {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun} = to_red_opts(Options),
+
+    Wrapper = fun({GroupKey, Reduction}, WAcc) ->
+        {_RowCount, _RowSize, UserReds} = Reduction,
+        RedValue = lists:nth(Idx, UserReds),
+        Callback(GroupKey, RedValue, WAcc)
+    end,
+
+    case {GroupKeyFun, Dir} of
+        {group_all, fwd} ->
+            EBtreeOpts = [
+                {dir, fwd},
+                {inclusive_end, InclusiveEnd}
+            ],
+            Reduction = ebtree:reduce(Tx, Btree, StartKey, EndKey, EBtreeOpts),
+            Wrapper({null, Reduction}, Acc0);
+        {F, fwd} when is_function(F) ->
+            EBtreeOpts = [
+                {dir, fwd},
+                {inclusive_end, InclusiveEnd}
+            ],
+            ebtree:group_reduce(
+                    Tx,
+                    Btree,
+                    StartKey,
+                    EndKey,
+                    GroupKeyFun,
+                    Wrapper,
+                    Acc0,
+                    EBtreeOpts
+                );
+        {group_all, rev} ->
+            % Start/End keys swapped on purpose because ebtree. Also
+            % inclusive_start for same reason.
+            EBtreeOpts = [
+                {dir, rev},
+                {inclusive_start, InclusiveEnd}
+            ],
+            Reduction = ebtree:reduce(Tx, Btree, EndKey, StartKey, EBtreeOpts),
+            Wrapper({null, Reduction}, Acc0);
+        {F, rev} when is_function(F) ->
+            % Start/End keys swapped on purpose because ebtree. Also
+            % inclusive_start for same reason.
+            EBtreeOpts = [
+                {dir, rev},
+                {inclusive_start, InclusiveEnd}
+            ],
+            ebtree:group_reduce(
+                    Tx,
+                    Btree,
+                    EndKey,
+                    StartKey,
+                    GroupKeyFun,
+                    Wrapper,
+                    Acc0,
+                    EBtreeOpts
+                )
+    end.
+
+
 write_doc(TxDb, Mrst, #{deleted := true} = Doc) ->
     #{
         tx := Tx
@@ -353,62 +422,87 @@ open_view_tree(TxDb, Sig, Lang, View) ->
     } = View,
     Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
     TreeOpts = [
-        {collate_fun, collate_fun(View)},
-        {reduce_fun, make_reduce_fun(View)}
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(Lang, View)}
     ],
     View#mrview{
         btree = ebtree:open(Tx, Prefix, 10, TreeOpts)
     }.
 
 
-collate_fun(View) ->
-    #mrview{
-        options = Options
-    } = View,
-    case couch_util:get_value(<<"collation">>, Options) of
-        <<"raw">> -> fun erlang:'=<'/2;
-        _ -> fun collate_rows/2
-    end.
+make_reduce_fun(Lang, #mrview{} = View) ->
+    RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs],
+    fun
+        (KVs0, _ReReduce = false) ->
+            KVs1 = detuple_kvs(expand_dupes(KVs0)),
+            TotalSize = lists:foldl(fun([K, V], Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = couch_ejson_size:encoded_size(V),
+                KSize + VSize + Acc
+            end, 0, KVs1),
+            {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs1),
+            {length(KVs1), TotalSize, UserReds};
+        (Reductions, _ReReduce = true) ->
+            FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) ->
+                NewCAcc = Count + CAcc,
+                NewSAcc = Size + SAcc,
+                NewURedAcc = [UserReds | URedAcc],
+                {NewCAcc, NewSAcc, NewURedAcc}
+            end,
+            InitAcc = {0, 0, []},
+            FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions),
+            {FinalCount, FinalSize, UReds} = FinalAcc,
+            {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds),
+            {FinalCount, FinalSize, Result}
+        end.
+
+
+to_map_opts(Options) ->
+    Dir = case lists:keyfind(dir, 1, Options) of
+        {dir, D} -> D;
+        _ -> fwd
+    end,
 
+    InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of
+        {inclusive_end, IE} -> IE;
+        _ -> true
+    end,
 
-collate_rows({KeyA, DocIdA}, {KeyB, DocIdB}) ->
-    case couch_ejson_compare:less(KeyA, KeyB) of
-        -1 -> lt;
-        0 when DocIdA < DocIdB -> lt;
-        0 when DocIdA == DocIdB -> eq;
-        0 -> gt; % when DocIdA > DocIdB
-        1 -> gt
-    end.
+    StartKey = case lists:keyfind(start_key, 1, Options) of
+        {start_key, SK} -> SK;
+        false when Dir == fwd -> ebtree:min();
+        false when Dir == rev -> ebtree:max()
+    end,
 
+    EndKey = case lists:keyfind(end_key, 1, Options) of
+        {end_key, EK} -> EK;
+        false when Dir == fwd -> ebtree:max();
+        false when Dir == rev -> ebtree:min()
+    end,
 
-make_reduce_fun(#mrview{}) ->
-    fun
-        (KVs, _ReReduce = false) ->
-            TotalSize = lists:foldl(fun({K, V}, Acc) ->
-                KSize = couch_ejson_size:encoded_size(K),
-                VSize = case V of
-                    {dups, Dups} ->
-                        lists:foldl(fun(D, DAcc) ->
-                            DAcc + couch_ejson_size:encoded_size(D)
-                        end, 0, Dups);
-                    _ ->
-                        couch_ejson_size:encoded_size(V)
-                end,
-                KSize + VSize + Acc
-            end, 0, KVs),
-            {length(KVs), TotalSize};
-        (KRs, _ReReduce = true) ->
-            lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
-                {Count + CountAcc, Size + SizeAcc}
-            end, {0, 0}, KRs)
-    end.
+    {Dir, StartKey, EndKey, InclusiveEnd}.
+
+
+to_red_opts(Options) ->
+    {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+    GroupKeyFun = case lists:keyfind(group_key_fun, 1, Options) of
+        {group_key_fun, GKF} -> GKF;
+        false -> fun({_Key, _DocId}) -> global_group end
+    end,
+
+    {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun}.
 
 
 dedupe_rows(View, KVs0) ->
-    CollateFun = collate_fun(View),
-    KVs1 = lists:sort(fun({KeyA, _}, {KeyB, _}) ->
-        CollateFun({KeyA, <<>>}, {KeyB, <<>>})
-    end, lists:sort(KVs0)),
+    CollateFun = couch_views_util:collate_fun(View),
+    KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) ->
+        case CollateFun({KeyA, <<>>}, {KeyB, <<>>}) of
+            lt -> true;
+            eq -> ValA =< ValB;
+            gt -> false
+        end
+    end, KVs0),
     dedupe_rows_int(CollateFun, KVs1).
 
 
@@ -422,22 +516,9 @@ dedupe_rows_int(CollateFun, [{K1, V1} | RestKVs]) ->
     RestDeduped = dedupe_rows_int(CollateFun, RestKVs),
     case RestDeduped of
         [{K2, V2} | RestRestDeduped] ->
-            Equal = case CollateFun({K1, <<>>}, {K2, <<>>}) of
-                true ->
-                    case CollateFun({K2, <<>>}, {K1, <<>>}) of
-                        true ->
-                            true;
-                        false ->
-                            false
-                    end;
-                false ->
-                    false
-            end,
-            case Equal of
-                true ->
-                    [{K1, combine_vals(V1, V2)} | RestRestDeduped];
-                false ->
-                    [{K1, V1} | RestDeduped]
+            case CollateFun({K1, <<>>}, {K2, <<>>}) of
+                eq -> [{K1, combine_vals(V1, V2)} | RestRestDeduped];
+                _ -> [{K1, V1} | RestDeduped]
             end;
         [] ->
             [{K1, V1}]
@@ -450,6 +531,22 @@ combine_vals(V1, V2) ->
     {dups, [V1, V2]}.
 
 
+expand_dupes([]) ->
+    [];
+expand_dupes([{K, {dups, Dups}} | Rest]) ->
+    Expanded = [{K, D} || D <- Dups],
+    Expanded ++ expand_dupes(Rest);
+expand_dupes([{K, V} | Rest]) ->
+    [{K, V} | expand_dupes(Rest)].
+
+
+detuple_kvs([]) ->
+    [];
+detuple_kvs([KV | Rest]) ->
+    {{Key, Id}, Value} = KV,
+    [[[Key, Id], Value] | detuple_kvs(Rest)].
+
+
 id_tree_prefix(DbPrefix, Sig) ->
     Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE},
     erlfdb_tuple:pack(Key, DbPrefix).
diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl
index 6342f61..c39598a 100644
--- a/src/couch_views/src/couch_views_reader.erl
+++ b/src/couch_views/src/couch_views_reader.erl
@@ -23,7 +23,15 @@
 -include_lib("fabric/include/fabric2.hrl").
 
 
-read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
+read(Db, Mrst, ViewName, UserCallback, UserAcc, Args) ->
+    ReadFun = case Args of
+        #mrargs{view_type = map} -> fun read_map_view/6;
+        #mrargs{view_type = red} -> fun read_red_view/6
+    end,
+    ReadFun(Db, Mrst, ViewName, UserCallback, UserAcc, Args).
+
+
+read_map_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
     try
         fabric2_fdb:transactional(Db, fun(TxDb) ->
             #mrst{
@@ -68,6 +76,73 @@ read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
     end.
 
 
+read_red_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
+    try
+        fabric2_fdb:transactional(Db, fun(TxDb) ->
+            #mrst{
+                language = Lang,
+                views = Views
+            } = Mrst = couch_views_fdb:set_trees(TxDb, Mrst0),
+
+            #mrargs{
+                extra = Extra
+            } = Args,
+
+            {Idx, Lang, View} = get_red_view(Lang, Args, ViewName, Views),
+            Fun = fun handle_red_row/3,
+
+            Meta = get_red_meta(TxDb, Mrst, View, Args),
+            UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)),
+
+            Finalizer = case couch_util:get_value(finalizer, Extra) of
+                undefined ->
+                    {_, FunSrc} = lists:nth(Idx, View#mrview.reduce_funs),
+                    FunSrc;
+                CustomFun->
+                    CustomFun
+            end,
+
+            Acc0 = #{
+                db => TxDb,
+                skip => Args#mrargs.skip,
+                limit => Args#mrargs.limit,
+                mrargs => undefined,
+                finalizer => Finalizer,
+                red_idx => Idx,
+                language => Lang,
+                callback => UserCallback,
+                acc => UserAcc1
+            },
+
+            Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) ->
+                Opts = mrargs_to_fdb_options(KeyArgs),
+                KeyAcc1 = KeyAcc0#{
+                    mrargs := KeyArgs
+                },
+                couch_views_fdb:fold_red_idx(
+                        TxDb,
+                        View,
+                        Idx,
+                        Opts,
+                        Fun,
+                        KeyAcc1
+                    )
+            end, Acc0, expand_keys_args(Args)),
+
+            #{
+                acc := UserAcc2
+            } = Acc1,
+            {ok, maybe_stop(UserCallback(complete, UserAcc2))}
+        end)
+    catch
+        throw:{complete, Out} ->
+            {_, Final} = UserCallback(complete, Out),
+            {ok, Final};
+        throw:{done, Out} ->
+            {ok, Out}
+    end.
+
+
 get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) ->
     TotalRows = couch_views_fdb:get_row_count(TxDb, View),
     ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
@@ -78,6 +153,14 @@ get_map_meta(TxDb, _Mrst, View, #mrargs{}) ->
     {meta, [{total, TotalRows}, {offset, null}]}.
 
 
+get_red_meta(TxDb, Mrst, _View, #mrargs{update_seq = true}) ->
+    ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
+    {meta,  [{update_seq, ViewSeq}]};
+
+get_red_meta(_TxDb, _Mrst, _View, #mrargs{}) ->
+    {meta, []}.
+
+
 handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
     Acc#{skip := Skip - 1};
 
@@ -115,6 +198,38 @@ handle_map_row(DocId, Key, Value, Acc) ->
     Acc#{limit := Limit - 1, acc := UserAcc1}.
 
 
+handle_red_row(_Key, _Red, #{skip := Skip} = Acc) when Skip > 0 ->
+    Acc#{skip := Skip - 1};
+
+handle_red_row(_Key, _Value, #{limit := 0, acc := UserAcc}) ->
+    throw({complete, UserAcc});
+
+handle_red_row(Key0, Value0, Acc) ->
+    #{
+        limit := Limit,
+        finalizer := Finalizer,
+        callback := UserCallback,
+        acc := UserAcc0
+    } = Acc,
+
+    Key1 = case Key0 of
+        undefined -> null;
+        _ -> Key0
+    end,
+    Value1 = maybe_finalize(Finalizer, Value0),
+    Row = [{key, Key1}, {value, Value1}],
+
+    UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)),
+    Acc#{limit := Limit - 1, acc := UserAcc1}.
+
+
+maybe_finalize(null, Red) ->
+    Red;
+maybe_finalize(Finalizer, Red) ->
+    {ok, Finalized} = couch_query_servers:finalize(Finalizer, Red),
+    Finalized.
+
+
 get_map_view(Lang, Args, ViewName, Views) ->
     case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
         {map, View, _Args} -> View;
@@ -122,6 +237,13 @@ get_map_view(Lang, Args, ViewName, Views) ->
     end.
 
 
+get_red_view(Lang, Args, ViewName, Views) ->
+    case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
+        {red, {Idx, Lang, View}, _} -> {Idx, Lang, View};
+        _ -> throw({not_found, missing_named_view})
+    end.
+
+
 expand_keys_args(#mrargs{keys = undefined} = Args) ->
     [Args];
 
@@ -136,12 +258,14 @@ expand_keys_args(#mrargs{keys = Keys} = Args) ->
 
 mrargs_to_fdb_options(Args) ->
     #mrargs{
+        view_type = ViewType,
         start_key = StartKey,
         start_key_docid = StartKeyDocId,
         end_key = EndKey,
         end_key_docid = EndKeyDocId0,
         direction = Direction,
-        inclusive_end = InclusiveEnd
+        inclusive_end = InclusiveEnd,
+        group_level = GroupLevel
     } = Args,
 
     StartKeyOpts = if StartKey == undefined -> []; true ->
@@ -160,10 +284,33 @@ mrargs_to_fdb_options(Args) ->
         [{end_key, {EndKey, EndKeyDocId}}]
     end,
 
+    GroupFunOpt = make_group_key_fun(ViewType, GroupLevel),
+
     [
         {dir, Direction},
         {inclusive_end, InclusiveEnd}
-    ] ++ StartKeyOpts ++ EndKeyOpts.
+    ] ++ StartKeyOpts ++ EndKeyOpts ++ GroupFunOpt.
+
+
+make_group_key_fun(map, _) ->
+    [];
+
+make_group_key_fun(red, exact) ->
+    [
+        {group_key_fun, fun({Key, _DocId}) -> Key end}
+    ];
+
+make_group_key_fun(red, 0) ->
+    [
+        {group_key_fun, group_all}
+    ];
+
+make_group_key_fun(red, N) when is_integer(N), N > 0 ->
+    GKFun = fun
+        ({Key, _DocId}) when is_list(Key) -> lists:sublist(Key, N);
+        ({Key, _DocId}) -> Key
+    end,
+    [{group_key_fun, GKFun}].
 
 
 maybe_stop({ok, Acc}) -> Acc;