You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2020/09/16 17:06:36 UTC
[couchdb] 04/12: Use ebtree for reduce functions
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch feature-ebtree-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit ea0fb193eeac69d4b91c4b503a4601903b969c98
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Jul 29 10:34:48 2020 -0500
Use ebtree for reduce functions
---
src/couch_views/src/couch_views.erl | 6 --
src/couch_views/src/couch_views_fdb.erl | 142 ++++++++++++++++++++++----
src/couch_views/src/couch_views_reader.erl | 153 ++++++++++++++++++++++++++++-
3 files changed, 271 insertions(+), 30 deletions(-)
diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl
index 525866e..8a05302 100644
--- a/src/couch_views/src/couch_views.erl
+++ b/src/couch_views/src/couch_views.erl
@@ -161,12 +161,6 @@ maybe_update_view(TxDb, Mrst, false, _Args) ->
end.
-is_reduce_view(#mrargs{view_type = ViewType}) ->
- ViewType =:= red;
-is_reduce_view({Reduce, _, _}) ->
- Reduce =:= red.
-
-
to_mrargs(#mrargs{} = Args) ->
Args;
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index cd78f64..a66f138 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -28,6 +28,7 @@
get_kv_size/2,
fold_map_idx/5,
+ fold_red_idx/6,
write_doc/3,
@@ -144,7 +145,7 @@ get_row_count(TxDb, View) ->
#{
tx := Tx
} = TxDb,
- {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+ {Count, _, _} = ebtree:full_reduce(Tx, View#mrview.btree),
Count.
@@ -152,7 +153,7 @@ get_kv_size(TxDb, View) ->
#{
tx := Tx
} = TxDb,
- {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+ {_, TotalSize, _} = ebtree:full_reduce(Tx, View#mrview.btree),
TotalSize.
@@ -216,6 +217,74 @@ fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
end.
+fold_red_idx(TxDb, View, Idx, Options, Callback, Acc0) ->
+ #{
+ tx := Tx
+ } = TxDb,
+ #mrview{
+ btree = Btree
+ } = View,
+
+ {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun} = to_red_opts(Options),
+
+ Wrapper = fun({GroupKey, Reduction}, WAcc) ->
+ {_RowCount, _RowSize, UserReds} = Reduction,
+ RedValue = lists:nth(Idx, UserReds),
+ Callback(GroupKey, RedValue, WAcc)
+ end,
+
+ case {GroupKeyFun, Dir} of
+ {group_all, fwd} ->
+ EBtreeOpts = [
+ {dir, fwd},
+ {inclusive_end, InclusiveEnd}
+ ],
+ Reduction = ebtree:reduce(Tx, Btree, StartKey, EndKey, EBtreeOpts),
+ Wrapper({null, Reduction}, Acc0);
+ {F, fwd} when is_function(F) ->
+ EBtreeOpts = [
+ {dir, fwd},
+ {inclusive_end, InclusiveEnd}
+ ],
+ ebtree:group_reduce(
+ Tx,
+ Btree,
+ StartKey,
+ EndKey,
+ GroupKeyFun,
+ Wrapper,
+ Acc0,
+ EBtreeOpts
+ );
+ {group_all, rev} ->
+ % Start/End keys swapped on purpose because ebtree. Also
+ % inclusive_start for same reason.
+ EBtreeOpts = [
+ {dir, rev},
+ {inclusive_start, InclusiveEnd}
+ ],
+ Reduction = ebtree:reduce(Tx, Btree, EndKey, StartKey, EBtreeOpts),
+ Wrapper({null, Reduction}, Acc0);
+ {F, rev} when is_function(F) ->
+ % Start/End keys swapped on purpose because ebtree. Also
+ % inclusive_start for same reason.
+ EBtreeOpts = [
+ {dir, rev},
+ {inclusive_start, InclusiveEnd}
+ ],
+ ebtree:group_reduce(
+ Tx,
+ Btree,
+ EndKey,
+ StartKey,
+ GroupKeyFun,
+ Wrapper,
+ Acc0,
+ EBtreeOpts
+ )
+ end.
+
+
write_doc(TxDb, Mrst, #{deleted := true} = Doc) ->
#{
tx := Tx
@@ -339,7 +408,7 @@ open_id_tree(TxDb, Sig) ->
ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
-open_view_tree(TxDb, Sig, _Lang, View) ->
+open_view_tree(TxDb, Sig, Lang, View) ->
#{
tx := Tx,
db_prefix := DbPrefix
@@ -350,7 +419,7 @@ open_view_tree(TxDb, Sig, _Lang, View) ->
Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
TreeOpts = [
{collate_fun, couch_views_util:collate_fun(View)},
- {reduce_fun, make_reduce_fun(View)}
+ {reduce_fun, make_reduce_fun(Lang, View)},
{persist_fun, fun persist_chunks/3}
],
View#mrview{
@@ -372,26 +441,30 @@ min_order(V) ->
V + 1.
-make_reduce_fun(#mrview{}) ->
+make_reduce_fun(Lang, #mrview{} = View) ->
+ RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs],
fun
- (KVs, _ReReduce = false) ->
- TotalSize = lists:foldl(fun({K, V}, Acc) ->
+ (KVs0, _ReReduce = false) ->
+ KVs1 = detuple_kvs(expand_dupes(KVs0)),
+ TotalSize = lists:foldl(fun([K, V], Acc) ->
KSize = couch_ejson_size:encoded_size(K),
- VSize = case V of
- {dups, Dups} ->
- lists:foldl(fun(D, DAcc) ->
- DAcc + couch_ejson_size:encoded_size(D)
- end, 0, Dups);
- _ ->
- couch_ejson_size:encoded_size(V)
- end,
+ VSize = couch_ejson_size:encoded_size(V),
KSize + VSize + Acc
- end, 0, KVs),
- {length(KVs), TotalSize};
- (KRs, _ReReduce = true) ->
- lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
- {Count + CountAcc, Size + SizeAcc}
- end, {0, 0}, KRs)
+ end, 0, KVs1),
+ {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs1),
+ {length(KVs1), TotalSize, UserReds};
+ (Reductions, _ReReduce = true) ->
+ FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) ->
+ NewCAcc = Count + CAcc,
+ NewSAcc = Size + SAcc,
+ NewURedAcc = [UserReds | URedAcc],
+ {NewCAcc, NewSAcc, NewURedAcc}
+ end,
+ InitAcc = {0, 0, []},
+ FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions),
+ {FinalCount, FinalSize, UReds} = FinalAcc,
+ {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds),
+ {FinalCount, FinalSize, Result}
end.
@@ -438,6 +511,17 @@ to_map_opts(Options) ->
{Dir, StartKey, EndKey, InclusiveEnd}.
+to_red_opts(Options) ->
+ {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+ GroupKeyFun = case lists:keyfind(group_key_fun, 1, Options) of
+ {group_key_fun, GKF} -> GKF;
+ false -> fun({_Key, _DocId}) -> global_group end
+ end,
+
+ {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun}.
+
+
dedupe_rows(View, KVs0) ->
CollateFun = couch_views_util:collate_fun(View),
KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) ->
@@ -475,6 +559,22 @@ combine_vals(V1, V2) ->
{dups, [V1, V2]}.
+expand_dupes([]) ->
+ [];
+expand_dupes([{K, {dups, Dups}} | Rest]) ->
+ Expanded = [{K, D} || D <- Dups],
+ Expanded ++ expand_dupes(Rest);
+expand_dupes([{K, V} | Rest]) ->
+ [{K, V} | expand_dupes(Rest)].
+
+
+detuple_kvs([]) ->
+ [];
+detuple_kvs([KV | Rest]) ->
+ {{Key, Id}, Value} = KV,
+ [[[Key, Id], Value] | detuple_kvs(Rest)].
+
+
id_tree_prefix(DbPrefix, Sig) ->
Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE},
erlfdb_tuple:pack(Key, DbPrefix).
diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl
index eaa310e..ecadb18 100644
--- a/src/couch_views/src/couch_views_reader.erl
+++ b/src/couch_views/src/couch_views_reader.erl
@@ -23,7 +23,15 @@
-include_lib("fabric/include/fabric2.hrl").
-read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
+read(Db, Mrst, ViewName, UserCallback, UserAcc, Args) ->
+ ReadFun = case Args of
+ #mrargs{view_type = map} -> fun read_map_view/6;
+ #mrargs{view_type = red} -> fun read_red_view/6
+ end,
+ ReadFun(Db, Mrst, ViewName, UserCallback, UserAcc, Args).
+
+
+read_map_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
try
fabric2_fdb:transactional(Db, fun(TxDb) ->
#mrst{
@@ -68,6 +76,73 @@ read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
end.
+read_red_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
+ try
+ fabric2_fdb:transactional(Db, fun(TxDb) ->
+ #mrst{
+ language = Lang,
+ views = Views
+ } = Mrst = couch_views_fdb:set_trees(TxDb, Mrst0),
+
+ #mrargs{
+ extra = Extra
+ } = Args,
+
+ {Idx, Lang, View} = get_red_view(Lang, Args, ViewName, Views),
+ Fun = fun handle_red_row/3,
+
+ Meta = get_red_meta(TxDb, Mrst, View, Args),
+ UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)),
+
+ Finalizer = case couch_util:get_value(finalizer, Extra) of
+ undefined ->
+ {_, FunSrc} = lists:nth(Idx, View#mrview.reduce_funs),
+ FunSrc;
+ CustomFun->
+ CustomFun
+ end,
+
+ Acc0 = #{
+ db => TxDb,
+ skip => Args#mrargs.skip,
+ limit => Args#mrargs.limit,
+ mrargs => undefined,
+ finalizer => Finalizer,
+ red_idx => Idx,
+ language => Lang,
+ callback => UserCallback,
+ acc => UserAcc1
+ },
+
+ Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) ->
+ Opts = mrargs_to_fdb_options(KeyArgs),
+ KeyAcc1 = KeyAcc0#{
+ mrargs := KeyArgs
+ },
+ couch_views_fdb:fold_red_idx(
+ TxDb,
+ View,
+ Idx,
+ Opts,
+ Fun,
+ KeyAcc1
+ )
+ end, Acc0, expand_keys_args(Args)),
+
+ #{
+ acc := UserAcc2
+ } = Acc1,
+ {ok, maybe_stop(UserCallback(complete, UserAcc2))}
+ end)
+ catch
+ throw:{complete, Out} ->
+ {_, Final} = UserCallback(complete, Out),
+ {ok, Final};
+ throw:{done, Out} ->
+ {ok, Out}
+ end.
+
+
get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) ->
TotalRows = couch_views_fdb:get_row_count(TxDb, View),
ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
@@ -78,6 +153,14 @@ get_map_meta(TxDb, _Mrst, View, #mrargs{}) ->
{meta, [{total, TotalRows}, {offset, null}]}.
+get_red_meta(TxDb, Mrst, _View, #mrargs{update_seq = true}) ->
+ ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
+ {meta, [{update_seq, ViewSeq}]};
+
+get_red_meta(_TxDb, _Mrst, _View, #mrargs{}) ->
+ {meta, []}.
+
+
handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
Acc#{skip := Skip - 1};
@@ -115,6 +198,38 @@ handle_map_row(DocId, Key, Value, Acc) ->
Acc#{limit := Limit - 1, acc := UserAcc1}.
+handle_red_row(_Key, _Red, #{skip := Skip} = Acc) when Skip > 0 ->
+ Acc#{skip := Skip - 1};
+
+handle_red_row(_Key, _Value, #{limit := 0, acc := UserAcc}) ->
+ throw({complete, UserAcc});
+
+handle_red_row(Key0, Value0, Acc) ->
+ #{
+ limit := Limit,
+ finalizer := Finalizer,
+ callback := UserCallback,
+ acc := UserAcc0
+ } = Acc,
+
+ Key1 = case Key0 of
+ undefined -> null;
+ _ -> Key0
+ end,
+ Value1 = maybe_finalize(Finalizer, Value0),
+ Row = [{key, Key1}, {value, Value1}],
+
+ UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)),
+ Acc#{limit := Limit - 1, acc := UserAcc1}.
+
+
+maybe_finalize(null, Red) ->
+ Red;
+maybe_finalize(Finalizer, Red) ->
+ {ok, Finalized} = couch_query_servers:finalize(Finalizer, Red),
+ Finalized.
+
+
get_map_view(Lang, Args, ViewName, Views) ->
case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
{map, View, _Args} -> View;
@@ -122,6 +237,13 @@ get_map_view(Lang, Args, ViewName, Views) ->
end.
+get_red_view(Lang, Args, ViewName, Views) ->
+ case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
+ {red, {Idx, Lang, View}, _} -> {Idx, Lang, View};
+ _ -> throw({not_found, missing_named_view})
+ end.
+
+
expand_keys_args(#mrargs{keys = undefined} = Args) ->
[Args];
@@ -136,12 +258,14 @@ expand_keys_args(#mrargs{keys = Keys} = Args) ->
mrargs_to_fdb_options(Args) ->
#mrargs{
+ view_type = ViewType,
start_key = StartKey,
start_key_docid = StartKeyDocId,
end_key = EndKey,
end_key_docid = EndKeyDocId0,
direction = Direction,
- inclusive_end = InclusiveEnd
+ inclusive_end = InclusiveEnd,
+ group_level = GroupLevel
} = Args,
StartKeyOpts = if StartKey == undefined -> []; true ->
@@ -160,10 +284,33 @@ mrargs_to_fdb_options(Args) ->
[{end_key, {EndKey, EndKeyDocId}}]
end,
+ GroupFunOpt = make_group_key_fun(ViewType, GroupLevel),
+
[
{dir, Direction},
{inclusive_end, InclusiveEnd}
- ] ++ StartKeyOpts ++ EndKeyOpts.
+ ] ++ StartKeyOpts ++ EndKeyOpts ++ GroupFunOpt.
+
+
+make_group_key_fun(map, _) ->
+ [];
+
+make_group_key_fun(red, exact) ->
+ [
+ {group_key_fun, fun({Key, _DocId}) -> Key end}
+ ];
+
+make_group_key_fun(red, 0) ->
+ [
+ {group_key_fun, group_all}
+ ];
+
+make_group_key_fun(red, N) when is_integer(N), N > 0 ->
+ GKFun = fun
+ ({Key, _DocId}) when is_list(Key) -> lists:sublist(Key, N);
+ ({Key, _DocId}) -> Key
+ end,
+ [{group_key_fun, GKFun}].
maybe_stop({ok, Acc}) -> Acc;