You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2020/08/05 19:22:40 UTC
[couchdb] branch prototype/fdb-layer-ebtree-views updated (3ba9b37
-> d2c2dab)
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a change to branch prototype/fdb-layer-ebtree-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git.
discard 3ba9b37 Add test suite for reduce views
discard ad6fc0c Use ebtree for reduce functions
discard 9a9fabe Views on ebtree
new 98f5573 Views on ebtree
new dd3d0ca Use ebtree for reduce functions
new d2c2dab Add test suite for reduce views
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (3ba9b37)
\
N -- N -- N refs/heads/prototype/fdb-layer-ebtree-views (d2c2dab)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
src/couch_views/src/couch_views_indexer.erl | 3 +--
src/couch_views/src/couch_views_updater.erl | 6 +++---
2 files changed, 4 insertions(+), 5 deletions(-)
[couchdb] 01/03: Views on ebtree
Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch prototype/fdb-layer-ebtree-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 98f5573867a925e31b9153b496527ce5290a459b
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Jul 24 10:59:05 2020 -0500
Views on ebtree
---
src/couch_views/include/couch_views.hrl | 5 +
src/couch_views/src/couch_views.erl | 14 +-
src/couch_views/src/couch_views_fdb.erl | 500 +++++++++++-----------
src/couch_views/src/couch_views_indexer.erl | 45 +-
src/couch_views/src/couch_views_reader.erl | 115 ++---
src/couch_views/src/couch_views_updater.erl | 13 +-
src/couch_views/src/couch_views_util.erl | 35 ++
src/couch_views/test/couch_views_cleanup_test.erl | 2 +-
src/couch_views/test/couch_views_indexer_test.erl | 30 +-
src/couch_views/test/couch_views_size_test.erl | 25 +-
src/couch_views/test/couch_views_updater_test.erl | 2 +-
11 files changed, 411 insertions(+), 375 deletions(-)
diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl
index 3d0110f..3882191 100644
--- a/src/couch_views/include/couch_views.hrl
+++ b/src/couch_views/include/couch_views.hrl
@@ -13,6 +13,7 @@
% Index info/data subspaces
-define(VIEW_INFO, 0).
-define(VIEW_DATA, 1).
+-define(VIEW_TREES, 3).
% Index info keys
-define(VIEW_UPDATE_SEQ, 0).
@@ -25,6 +26,10 @@
-define(VIEW_ID_RANGE, 0).
-define(VIEW_MAP_RANGE, 1).
+% Tree keys
+-define(VIEW_ID_TREE, 0).
+-define(VIEW_ROW_TREES, 1).
+
% jobs api
-define(INDEX_JOB_TYPE, <<"views">>).
diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl
index d9ba0c1..f6e163a 100644
--- a/src/couch_views/src/couch_views.erl
+++ b/src/couch_views/src/couch_views.erl
@@ -100,9 +100,10 @@ get_info(Db, DDoc) ->
{ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
Sig = fabric2_util:to_hex(Mrst#mrst.sig),
{UpdateSeq, DataSize, Status} = fabric2_fdb:transactional(Db, fun(TxDb) ->
- Seq = couch_views_fdb:get_update_seq(TxDb, Mrst),
- DataSize = get_total_view_size(TxDb, Mrst),
- JobStatus = case couch_views_jobs:job_state(TxDb, Mrst) of
+ Mrst1 = couch_views_fdb:set_trees(TxDb, Mrst),
+ Seq = couch_views_fdb:get_update_seq(TxDb, Mrst1),
+ DataSize = get_total_view_size(TxDb, Mrst1),
+ JobStatus = case couch_views_jobs:job_state(TxDb, Mrst1) of
{ok, pending} -> true;
{ok, running} -> true;
{ok, finished} -> false;
@@ -124,10 +125,9 @@ get_info(Db, DDoc) ->
get_total_view_size(TxDb, Mrst) ->
- ViewIds = [View#mrview.id_num || View <- Mrst#mrst.views],
- lists:foldl(fun (ViewId, Total) ->
- Total + couch_views_fdb:get_kv_size(TxDb, Mrst, ViewId)
- end, 0, ViewIds).
+ lists:foldl(fun(View, Total) ->
+ Total + couch_views_fdb:get_kv_size(TxDb, View)
+ end, 0, Mrst#mrst.views).
read_view(Db, Mrst, ViewName, Callback, Acc0, Args) ->
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index c957222..9fe9a89 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -22,12 +22,14 @@
get_update_seq/2,
set_update_seq/3,
- get_row_count/3,
- get_kv_size/3,
+ set_trees/2,
- fold_map_idx/6,
+ get_row_count/2,
+ get_kv_size/2,
- write_doc/4,
+ fold_map_idx/5,
+
+ write_doc/3,
list_signatures/1,
clear_index/2
@@ -126,92 +128,150 @@ set_update_seq(TxDb, Sig, Seq) ->
ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq).
-get_row_count(TxDb, #mrst{sig = Sig}, ViewId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- case erlfdb:wait(erlfdb:get(Tx, row_count_key(DbPrefix, Sig, ViewId))) of
- not_found -> 0; % Can this happen?
- CountBin -> ?bin2uint(CountBin)
- end.
+set_trees(TxDb, Mrst) ->
+ #mrst{
+ sig = Sig,
+ language = Lang,
+ views = Views
+ } = Mrst,
+ Mrst#mrst{
+ id_btree = open_id_tree(TxDb, Sig),
+ views = [open_view_tree(TxDb, Sig, Lang, V) || V <- Views]
+ }.
-get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) ->
+get_row_count(TxDb, View) ->
#{
- tx := Tx,
- db_prefix := DbPrefix
+ tx := Tx
} = TxDb,
-
- case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of
- not_found -> 0; % Can this happen?
- SizeBin -> ?bin2uint(SizeBin)
- end.
+ {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+ Count.
-fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) ->
+get_kv_size(TxDb, View) ->
#{
- db_prefix := DbPrefix
+ tx := Tx
} = TxDb,
+ {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+ TotalSize.
- MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
- FoldAcc = #{
- prefix => MapIdxPrefix,
- callback => Callback,
- acc => Acc0
- },
- Fun = aegis:wrap_fold_fun(TxDb, fun fold_fwd/2),
+fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
#{
- acc := Acc1
- } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, FoldAcc, Options),
-
- Acc1.
+ tx := Tx
+ } = TxDb,
+ #mrview{
+ btree = Btree
+ } = View,
+
+ CollateFun = couch_views_util:collate_fun(View),
+
+ {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+ Wrapper = fun(KVs0, WAcc) ->
+ % Remove any keys that match Start or End key
+ % depending on direction
+ KVs1 = case InclusiveEnd of
+ true ->
+ KVs0;
+ false when Dir == fwd ->
+ lists:filter(fun({K, _V}) ->
+ case CollateFun(K, EndKey) of
+ lt -> true;
+ eq -> false;
+ gt -> false
+ end
+ end, KVs0);
+ false when Dir == rev ->
+ lists:filter(fun({K, _V}) ->
+ case CollateFun(K, EndKey) of
+ lt -> false;
+ eq -> false;
+ gt -> true
+ end
+ end, KVs0)
+ end,
+ % Expand dups
+ KVs2 = lists:flatmap(fun({K, V}) ->
+ case V of
+ {dups, Dups} when Dir == fwd ->
+ [{K, D} || D <- Dups];
+ {dups, Dups} when Dir == rev ->
+ [{K, D} || D <- lists:reverse(Dups)];
+ _ ->
+ [{K, V}]
+ end
+ end, KVs1),
+ lists:foldl(fun({{Key, DocId}, Value}, WAccInner) ->
+ Callback(DocId, Key, Value, WAccInner)
+ end, WAcc, KVs2)
+ end,
+
+ case Dir of
+ fwd ->
+ ebtree:range(Tx, Btree, StartKey, EndKey, Wrapper, Acc0);
+ rev ->
+ % Start/End keys swapped on purpose because ebtree
+ ebtree:reverse_range(Tx, Btree, EndKey, StartKey, Wrapper, Acc0)
+ end.
-write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
+write_doc(TxDb, Mrst, #{deleted := true} = Doc) ->
+ #{
+ tx := Tx
+ } = TxDb,
#{
id := DocId
} = Doc,
- ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
+ ExistingViewKeys = get_view_keys(TxDb, Mrst, DocId),
- clear_id_idx(TxDb, Sig, DocId),
- lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) ->
- clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
- update_row_count(TxDb, Sig, ViewId, -TotalKeys),
- update_kv_size(TxDb, Sig, ViewId, -TotalSize)
- end, ExistingViewKeys);
+ ebtree:delete(Tx, Mrst#mrst.id_btree, DocId),
+ lists:foreach(fun(#mrview{id_num = ViewId, btree = Btree}) ->
+ ViewKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
+ {ViewId, Keys} -> Keys;
+ false -> []
+ end,
+ lists:foreach(fun(Key) ->
+ ebtree:delete(Tx, Btree, {Key, DocId})
+ end, ViewKeys)
+ end, Mrst#mrst.views);
-write_doc(TxDb, Sig, ViewIds, Doc) ->
+write_doc(TxDb, Mrst, Doc) ->
+ #{
+ tx := Tx
+ } = TxDb,
#{
id := DocId,
- results := Results,
- kv_sizes := KVSizes
+ results := Results
} = Doc,
- ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
-
- clear_id_idx(TxDb, Sig, DocId),
+ ExistingViewKeys = get_view_keys(TxDb, Mrst, DocId),
- lists:foreach(fun({ViewId, NewRows, KVSize}) ->
- update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize),
+ NewIdKeys = lists:foldl(fun({View, RawNewRows}, IdKeyAcc) ->
+ #mrview{
+ id_num = ViewId
+ } = View,
+ % Remove old keys in the view
ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
- {ViewId, TotalRows, TotalSize, EKeys} ->
- RowChange = length(NewRows) - TotalRows,
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize),
- EKeys;
- false ->
- RowChange = length(NewRows),
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, KVSize),
- []
+ {ViewId, Keys} -> Keys;
+ false -> []
end,
- update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
- end, lists:zip3(ViewIds, Results, KVSizes)).
+ lists:foreach(fun(K) ->
+ ebtree:delete(Tx, View#mrview.btree, {K, DocId})
+ end, ExistingKeys),
+
+ % Insert new rows
+ NewRows = dedupe_rows(View, RawNewRows),
+ lists:foreach(fun({K, V}) ->
+ ebtree:insert(Tx, View#mrview.btree, {K, DocId}, V)
+ end, NewRows),
+ ViewKeys = {View#mrview.id_num, lists:usort([K || {K, _V} <- NewRows])},
+ [ViewKeys | IdKeyAcc]
+ end, [], lists:zip(Mrst#mrst.views, Results)),
+
+ ebtree:insert(Tx, Mrst#mrst.id_btree, DocId, NewIdKeys).
list_signatures(Db) ->
@@ -244,200 +304,163 @@ clear_index(Db, Signature) ->
end, Keys),
% Clear index data
- RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
- RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
- erlfdb:clear_range_startswith(Tx, RangePrefix).
-
-
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-% `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
-
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
- #{
- prefix := Prefix,
- callback := UserCallback,
- acc := UserAcc0
- } = Acc,
-
- {{_SortKey, DocId}, _DupeId} =
- erlfdb_tuple:unpack(RowKey, Prefix),
-
- {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
- Value = couch_views_encoding:decode(EncodedValue),
- Key = couch_views_encoding:decode(EncodedOriginalKey),
-
- UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
- Acc#{
- acc := UserAcc1
- }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
- ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- lists:foreach(fun(ViewKey) ->
- {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
- ok = erlfdb:clear_range(Tx, Start, End)
- end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
- Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
- ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- Unique = lists:usort([K || {K, _V} <- NewRows]),
+ DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+ DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+ erlfdb:clear_range_startswith(Tx, DataPrefix),
- Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
- Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
- ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
+ % Clear tree data
+ TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+ TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+ erlfdb:clear_range_startswith(Tx, TreePrefix).
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
+get_view_keys(TxDb, Mrst, DocId) ->
#{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- lists:foreach(fun(RemKey) ->
- {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
- ok = erlfdb:clear_range(Tx, Start, End)
- end, ExistingKeys),
-
- KVsToAdd = process_rows(NewRows),
- MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
- lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
- KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
- Val = erlfdb_tuple:pack({Key2, EV}),
- ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
- end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
+ tx := Tx
} = TxDb,
- {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
- lists:map(fun({K, V}) ->
- {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
- erlfdb_tuple:unpack(K, DbPrefix),
- [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
- {ViewId, TotalKeys, TotalSize, UniqueKeys}
- end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+ #mrst{
+ id_btree = IdTree
+ } = Mrst,
+ case ebtree:lookup(Tx, IdTree, DocId) of
+ {DocId, ViewKeys} -> ViewKeys;
+ false -> []
+ end.
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_id_tree(TxDb, Sig) ->
#{
tx := Tx,
db_prefix := DbPrefix
} = TxDb,
- Key = row_count_key(DbPrefix, Sig, ViewId),
- erlfdb:add(Tx, Key, Increment).
+ Prefix = id_tree_prefix(DbPrefix, Sig),
+ ebtree:open(Tx, Prefix, get_order(id_btree), []).
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, _Lang, View) ->
#{
tx := Tx,
db_prefix := DbPrefix
} = TxDb,
-
- % Track a view specific size for calls to
- % GET /dbname/_design/doc/_info`
- IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
- erlfdb:add(Tx, IdxKey, Increment),
-
- % Track a database level rollup for calls to
- % GET /dbname
- DbKey = db_kv_size_key(DbPrefix),
- erlfdb:add(Tx, DbKey, Increment).
+ #mrview{
+ id_num = ViewId
+ } = View,
+ Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+ TreeOpts = [
+ {collate_fun, couch_views_util:collate_fun(View)},
+ {reduce_fun, make_reduce_fun(View)}
+ ],
+ View#mrview{
+ btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+ }.
-seq_key(DbPrefix, Sig) ->
- Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig},
- erlfdb_tuple:pack(Key, DbPrefix).
+get_order(id_btree) ->
+ config:get_integer("couch_views", "id_btree_node_size", 100);
+get_order(view_btree) ->
+ config:get_integer("couch_views", "view_btree_node_size", 100).
+
+
+make_reduce_fun(#mrview{}) ->
+ fun
+ (KVs, _ReReduce = false) ->
+ TotalSize = lists:foldl(fun({K, V}, Acc) ->
+ KSize = couch_ejson_size:encoded_size(K),
+ VSize = case V of
+ {dups, Dups} ->
+ lists:foldl(fun(D, DAcc) ->
+ DAcc + couch_ejson_size:encoded_size(D)
+ end, 0, Dups);
+ _ ->
+ couch_ejson_size:encoded_size(V)
+ end,
+ KSize + VSize + Acc
+ end, 0, KVs),
+ {length(KVs), TotalSize};
+ (KRs, _ReReduce = true) ->
+ lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
+ {Count + CountAcc, Size + SizeAcc}
+ end, {0, 0}, KRs)
+ end.
-row_count_key(DbPrefix, Sig, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, ViewId},
- erlfdb_tuple:pack(Key, DbPrefix).
+to_map_opts(Options) ->
+ Dir = case lists:keyfind(dir, 1, Options) of
+ {dir, D} -> D;
+ _ -> fwd
+ end,
+
+ InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of
+ {inclusive_end, IE} -> IE;
+ _ -> true
+ end,
+
+ StartKey = case lists:keyfind(start_key, 1, Options) of
+ {start_key, SK} -> SK;
+ false when Dir == fwd -> ebtree:min();
+ false when Dir == rev -> ebtree:max()
+ end,
+
+ EndKey = case lists:keyfind(end_key, 1, Options) of
+ {end_key, EK} -> EK;
+ false when Dir == fwd -> ebtree:max();
+ false when Dir == rev -> ebtree:min()
+ end,
+
+ {Dir, StartKey, EndKey, InclusiveEnd}.
+
+
+dedupe_rows(View, KVs0) ->
+ CollateFun = couch_views_util:collate_fun(View),
+ KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) ->
+ case CollateFun({KeyA, <<>>}, {KeyB, <<>>}) of
+ lt -> true;
+ eq -> ValA =< ValB;
+ gt -> false
+ end
+ end, KVs0),
+ dedupe_rows_int(CollateFun, KVs1).
+
+
+dedupe_rows_int(_CollateFun, []) ->
+ [];
+
+dedupe_rows_int(_CollateFun, [KV]) ->
+ [KV];
+
+dedupe_rows_int(CollateFun, [{K1, V1} | RestKVs]) ->
+ RestDeduped = dedupe_rows_int(CollateFun, RestKVs),
+ case RestDeduped of
+ [{K2, V2} | RestRestDeduped] ->
+ case CollateFun({K1, <<>>}, {K2, <<>>}) of
+ eq -> [{K1, combine_vals(V1, V2)} | RestRestDeduped];
+ _ -> [{K1, V1} | RestDeduped]
+ end;
+ [] ->
+ [{K1, V1}]
+ end.
-kv_size_key(DbPrefix, Sig, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, ViewId},
- erlfdb_tuple:pack(Key, DbPrefix).
+combine_vals(V1, {dups, V2}) ->
+ {dups, [V1 | V2]};
+combine_vals(V1, V2) ->
+ {dups, [V1, V2]}.
-db_kv_size_key(DbPrefix) ->
- Key = {?DB_STATS, <<"sizes">>, <<"views">>},
+id_tree_prefix(DbPrefix, Sig) ->
+ Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE},
erlfdb_tuple:pack(Key, DbPrefix).
-id_idx_key(DbPrefix, Sig, DocId, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId},
+view_tree_prefix(DbPrefix, Sig, ViewId) ->
+ Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ROW_TREES, ViewId},
erlfdb_tuple:pack(Key, DbPrefix).
-id_idx_range(DbPrefix, Sig, DocId) ->
- Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId},
- erlfdb_tuple:range(Key, DbPrefix).
-
-
-map_idx_prefix(DbPrefix, Sig, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, ViewId},
+seq_key(DbPrefix, Sig) ->
+ Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig},
erlfdb_tuple:pack(Key, DbPrefix).
-map_idx_key(MapIdxPrefix, MapKey, DupeId) ->
- Key = {MapKey, DupeId},
- erlfdb_tuple:pack(Key, MapIdxPrefix).
-
-
-map_idx_range(DbPrefix, Sig, ViewId, MapKey, DocId) ->
- Encoded = couch_views_encoding:encode(MapKey, key),
- Key = {
- ?DB_VIEWS,
- ?VIEW_DATA,
- Sig,
- ?VIEW_MAP_RANGE,
- ViewId,
- {Encoded, DocId}
- },
- erlfdb_tuple:range(Key, DbPrefix).
-
-
creation_vs_key(Db, Sig) ->
#{
db_prefix := DbPrefix
@@ -454,22 +477,15 @@ build_status_key(Db, Sig) ->
erlfdb_tuple:pack(Key, DbPrefix).
-process_rows(Rows) ->
- Encoded = lists:map(fun({K, V}) ->
- EK1 = couch_views_encoding:encode(K, key),
- EK2 = couch_views_encoding:encode(K, value),
- EV = couch_views_encoding:encode(V, value),
- {EK1, EK2, EV}
- end, Rows),
-
- Grouped = lists:foldl(fun({K1, K2, V}, Acc) ->
- dict:append(K1, {K2, V}, Acc)
- end, dict:new(), Encoded),
-
- dict:fold(fun(K1, Vals, DAcc) ->
- Vals1 = lists:keysort(2, Vals),
- {_, Labeled} = lists:foldl(fun({K2, V}, {Count, Acc}) ->
- {Count + 1, [{Count, K1, K2, V} | Acc]}
- end, {0, []}, Vals1),
- Labeled ++ DAcc
- end, [], Grouped).
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+dedupe_basic_test() ->
+ View = #mrview{},
+ ?assertEqual([{1, 1}], dedupe_rows(View, [{1, 1}])).
+
+dedupe_simple_test() ->
+ View = #mrview{},
+ ?assertEqual([{1, {dups, [1, 2]}}], dedupe_rows(View, [{1, 1}, {1, 2}])).
+
+-endif.
diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl
index 9c8be6f..50120c6 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -184,6 +184,7 @@ update(#{} = Db, Mrst0, State0) ->
do_update(Db, Mrst0, State0) ->
fabric2_fdb:transactional(Db, fun(TxDb) ->
+ Mrst1 = couch_views_fdb:set_trees(TxDb, Mrst0),
State1 = get_update_start_state(TxDb, Mrst0, State0),
{ok, State2} = fold_changes(State1),
@@ -201,8 +202,8 @@ do_update(Db, Mrst0, State0) ->
DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc),
couch_rate:in(Limiter, Count),
- {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1),
- WrittenDocs = write_docs(TxDb, Mrst1, MappedDocs, State2),
+ {Mrst2, MappedDocs} = map_docs(Mrst1, DocAcc1),
+ WrittenDocs = write_docs(TxDb, Mrst2, MappedDocs, State2),
ChangesDone = ChangesDone0 + WrittenDocs,
@@ -210,14 +211,14 @@ do_update(Db, Mrst0, State0) ->
case Count < Limit of
true ->
- maybe_set_build_status(TxDb, Mrst1, ViewVS,
+ maybe_set_build_status(TxDb, Mrst2, ViewVS,
?INDEX_READY),
report_progress(State2#{changes_done := ChangesDone},
finished),
- {Mrst1, finished};
+ {Mrst2, finished};
false ->
State3 = report_progress(State2, update),
- {Mrst1, State3#{
+ {Mrst2, State3#{
tx_db := undefined,
count := 0,
doc_acc := [],
@@ -356,7 +357,6 @@ map_docs(Mrst, Docs) ->
write_docs(TxDb, Mrst, Docs, State) ->
#mrst{
- views = Views,
sig = Sig
} = Mrst,
@@ -364,20 +364,19 @@ write_docs(TxDb, Mrst, Docs, State) ->
last_seq := LastSeq
} = State,
- ViewIds = [View#mrview.id_num || View <- Views],
KeyLimit = key_size_limit(),
ValLimit = value_size_limit(),
- DocsNumber = lists:foldl(fun(Doc0, N) ->
- Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit),
- couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1),
- N + 1
- end, 0, Docs),
+ lists:foreach(fun(Doc0) ->
+ Doc1 = check_kv_size_limit(Mrst, Doc0, KeyLimit, ValLimit),
+ couch_views_fdb:write_doc(TxDb, Mrst, Doc1)
+ end, Docs),
if LastSeq == false -> ok; true ->
couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq)
end,
- DocsNumber.
+
+ length(Docs).
fetch_docs(Db, DesignOpts, Changes) ->
@@ -462,7 +461,7 @@ start_query_server(#mrst{} = Mrst) ->
Mrst.
-calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
+check_kv_size_limit(Mrst, Doc, KeyLimit, ValLimit) ->
#mrst{
db_name = DbName,
idx_name = IdxName
@@ -471,10 +470,10 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
results := Results
} = Doc,
try
- KVSizes = lists:map(fun(ViewRows) ->
- lists:foldl(fun({K, V}, Acc) ->
- KeySize = erlang:external_size(K),
- ValSize = erlang:external_size(V),
+ lists:foreach(fun(ViewRows) ->
+ lists:foreach(fun({K, V}) ->
+ KeySize = couch_ejson_size:encoded_size(K),
+ ValSize = couch_ejson_size:encoded_size(V),
if KeySize =< KeyLimit -> ok; true ->
throw({size_error, key})
@@ -482,12 +481,10 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
if ValSize =< ValLimit -> ok; true ->
throw({size_error, value})
- end,
-
- Acc + KeySize + ValSize
- end, 0, ViewRows)
+ end
+ end, ViewRows)
end, Results),
- Doc#{kv_sizes => KVSizes}
+ Doc
catch throw:{size_error, Type} ->
#{id := DocId} = Doc,
Fmt = "View ~s size error for docid `~s`, excluded from indexing "
@@ -572,4 +569,4 @@ key_size_limit() ->
value_size_limit() ->
- config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT).
\ No newline at end of file
+ config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT).
diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl
index ce7f163..6342f61 100644
--- a/src/couch_views/src/couch_views_reader.erl
+++ b/src/couch_views/src/couch_views_reader.erl
@@ -23,24 +23,24 @@
-include_lib("fabric/include/fabric2.hrl").
-read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) ->
- #mrst{
- language = Lang,
- sig = Sig,
- views = Views
- } = Mrst,
-
- ViewId = get_view_id(Lang, Args, ViewName, Views),
- Fun = fun handle_row/4,
-
+read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
try
fabric2_fdb:transactional(Db, fun(TxDb) ->
- Meta = get_meta(TxDb, Mrst, ViewId, Args),
+ #mrst{
+ language = Lang,
+ views = Views
+ } = Mrst = couch_views_fdb:set_trees(TxDb, Mrst0),
+
+ View = get_map_view(Lang, Args, ViewName, Views),
+ Fun = fun handle_map_row/4,
+
+ Meta = get_map_meta(TxDb, Mrst, View, Args),
UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)),
Acc0 = #{
db => TxDb,
skip => Args#mrargs.skip,
+ limit => Args#mrargs.limit,
mrargs => undefined,
callback => UserCallback,
acc => UserAcc1
@@ -51,14 +51,7 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) ->
KeyAcc1 = KeyAcc0#{
mrargs := KeyArgs
},
- couch_views_fdb:fold_map_idx(
- TxDb,
- Sig,
- ViewId,
- Opts,
- Fun,
- KeyAcc1
- )
+ couch_views_fdb:fold_map_idx(TxDb, View, Opts, Fun, KeyAcc1)
end, Acc0, expand_keys_args(Args)),
#{
@@ -66,27 +59,35 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) ->
} = Acc1,
{ok, maybe_stop(UserCallback(complete, UserAcc2))}
end)
- catch throw:{done, Out} ->
- {ok, Out}
+ catch
+ throw:{complete, Out} ->
+ {_, Final} = UserCallback(complete, Out),
+ {ok, Final};
+ throw:{done, Out} ->
+ {ok, Out}
end.
-get_meta(TxDb, Mrst, ViewId, #mrargs{update_seq = true}) ->
- TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId),
+get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) ->
+ TotalRows = couch_views_fdb:get_row_count(TxDb, View),
ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
{meta, [{update_seq, ViewSeq}, {total, TotalRows}, {offset, null}]};
-get_meta(TxDb, Mrst, ViewId, #mrargs{}) ->
- TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId),
+get_map_meta(TxDb, _Mrst, View, #mrargs{}) ->
+ TotalRows = couch_views_fdb:get_row_count(TxDb, View),
{meta, [{total, TotalRows}, {offset, null}]}.
-handle_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
+handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
Acc#{skip := Skip - 1};
-handle_row(DocId, Key, Value, Acc) ->
+handle_map_row(_DocID, _Key, _Value, #{limit := 0, acc := UserAcc}) ->
+ throw({complete, UserAcc});
+
+handle_map_row(DocId, Key, Value, Acc) ->
#{
db := TxDb,
+ limit := Limit,
mrargs := Args,
callback := UserCallback,
acc := UserAcc0
@@ -111,13 +112,13 @@ handle_row(DocId, Key, Value, Acc) ->
end,
UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)),
- Acc#{acc := UserAcc1}.
+ Acc#{limit := Limit - 1, acc := UserAcc1}.
-get_view_id(Lang, Args, ViewName, Views) ->
+get_map_view(Lang, Args, ViewName, Views) ->
case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
- {map, View, _Args} -> View#mrview.id_num;
- {red, {_Idx, _Lang, View}} -> View#mrview.id_num
+ {map, View, _Args} -> View;
+ {red, {_Idx, _Lang, View}, _} -> View
end.
@@ -135,57 +136,33 @@ expand_keys_args(#mrargs{keys = Keys} = Args) ->
mrargs_to_fdb_options(Args) ->
#mrargs{
- start_key = StartKey0,
+ start_key = StartKey,
start_key_docid = StartKeyDocId,
- end_key = EndKey0,
- end_key_docid = EndKeyDocId,
+ end_key = EndKey,
+ end_key_docid = EndKeyDocId0,
direction = Direction,
- limit = Limit,
- skip = Skip,
inclusive_end = InclusiveEnd
} = Args,
- StartKey1 = if StartKey0 == undefined -> undefined; true ->
- couch_views_encoding:encode(StartKey0, key)
- end,
-
- StartKeyOpts = case {StartKey1, StartKeyDocId} of
- {undefined, _} ->
- [];
- {StartKey1, StartKeyDocId} ->
- [{start_key, {StartKey1, StartKeyDocId}}]
+ StartKeyOpts = if StartKey == undefined -> []; true ->
+ [{start_key, {StartKey, StartKeyDocId}}]
end,
- EndKey1 = if EndKey0 == undefined -> undefined; true ->
- couch_views_encoding:encode(EndKey0, key)
+ EndKeyDocId = case {Direction, EndKeyDocId0} of
+ {fwd, <<255>>} when InclusiveEnd -> <<255>>;
+ {fwd, <<255>>} when not InclusiveEnd -> <<>>;
+ {rev, <<>>} when InclusiveEnd -> <<>>;
+ {rev, <<>>} when not InclusiveEnd -> <<255>>;
+ _ -> EndKeyDocId0
end,
- EndKeyOpts = case {EndKey1, EndKeyDocId, Direction} of
- {undefined, _, _} ->
- [];
- {EndKey1, <<>>, rev} when not InclusiveEnd ->
- % When we iterate in reverse with
- % inclusive_end=false we have to set the
- % EndKeyDocId to <<255>> so that we don't
- % include matching rows.
- [{end_key_gt, {EndKey1, <<255>>}}];
- {EndKey1, <<255>>, _} when not InclusiveEnd ->
- % When inclusive_end=false we need to
- % elide the default end_key_docid so as
- % to not sort past the docids with the
- % given end key.
- [{end_key_gt, {EndKey1}}];
- {EndKey1, EndKeyDocId, _} when not InclusiveEnd ->
- [{end_key_gt, {EndKey1, EndKeyDocId}}];
- {EndKey1, EndKeyDocId, _} when InclusiveEnd ->
- [{end_key, {EndKey1, EndKeyDocId}}]
+ EndKeyOpts = if EndKey == undefined -> []; true ->
+ [{end_key, {EndKey, EndKeyDocId}}]
end,
[
{dir, Direction},
- {limit, Limit + Skip},
- {streaming_mode, want_all},
- {restart_tx, true}
+ {inclusive_end, InclusiveEnd}
] ++ StartKeyOpts ++ EndKeyOpts.
diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl
index ba9fadb..8835b6a 100644
--- a/src/couch_views/src/couch_views_updater.erl
+++ b/src/couch_views/src/couch_views_updater.erl
@@ -87,16 +87,17 @@ write_doc(Db, #doc{deleted = Deleted} = Doc) ->
},
lists:foreach(fun(DDoc) ->
- {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
+ {ok, Mrst0} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
+ Mrst1 = couch_views_fdb:set_trees(Db, Mrst0),
- case should_index_doc(Doc, Mrst) of
+ case should_index_doc(Doc, Mrst1) of
true ->
- {Mrst1, Result1} = couch_views_indexer:map_docs(Mrst, Result0),
- DocNumber = couch_views_indexer:write_docs(Db, Mrst1,
+ {Mrst2, Result1} = couch_views_indexer:map_docs(Mrst1, Result0),
+ DocNumber = couch_views_indexer:write_docs(Db, Mrst2,
Result1, State),
- couch_views_plugin:after_interactive_write(Db, Mrst1,
+ couch_views_plugin:after_interactive_write(Db, Mrst2,
Result1, DocNumber),
- couch_eval:release_map_context(Mrst1#mrst.qserver);
+ couch_eval:release_map_context(Mrst2#mrst.qserver);
false ->
ok
end
diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl
index 11bba75..34f8719 100644
--- a/src/couch_views/src/couch_views_util.erl
+++ b/src/couch_views/src/couch_views_util.erl
@@ -15,6 +15,7 @@
-export([
ddoc_to_mrst/2,
+ collate_fun/1,
validate_args/1,
validate_args/2,
is_paginated/1,
@@ -82,6 +83,40 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) ->
{ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}.
+collate_fun(View) ->
+ #mrview{
+ options = Options
+ } = View,
+ case couch_util:get_value(<<"collation">>, Options) of
+ <<"raw">> -> fun collate_raw/2;
+ _ -> fun collate_rows/2
+ end.
+
+
+collate_raw(A, A) -> eq;
+collate_raw(A, B) when A < B -> lt;
+collate_raw(A, B) when A > B -> gt.
+
+
+collate_rows({KeyA, DocIdA}, {KeyB, DocIdB}) ->
+ case couch_ejson_compare:less(KeyA, KeyB) of
+ -1 -> lt;
+ 0 when DocIdA < DocIdB -> lt;
+ 0 when DocIdA == DocIdB -> eq;
+ 0 -> gt; % when DocIdA > DocIdB
+ 1 -> gt
+ end;
+
+collate_rows(KeyA, KeyB) ->
+ % When collating reduce group keys they don't
+ % come with a docid.
+ case couch_ejson_compare:less(KeyA, KeyB) of
+ -1 -> lt;
+ 0 -> eq;
+ 1 -> gt
+ end.
+
+
validate_args(Args) ->
validate_args(Args, []).
diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl
index e4dcdce..54048c9 100644
--- a/src/couch_views/test/couch_views_cleanup_test.erl
+++ b/src/couch_views/test/couch_views_cleanup_test.erl
@@ -302,7 +302,7 @@ view_has_data(Db, DDoc) ->
SigKey = erlfdb_tuple:pack(SigKeyTuple, DbPrefix),
SigVal = erlfdb:wait(erlfdb:get(Tx, SigKey)),
- RangeKeyTuple = {?DB_VIEWS, ?VIEW_DATA, Sig},
+ RangeKeyTuple = {?DB_VIEWS, ?VIEW_TREES, Sig},
RangeKey = erlfdb_tuple:pack(RangeKeyTuple, DbPrefix),
Range = erlfdb:wait(erlfdb:get_range_startswith(Tx, RangeKey)),
diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl
index cb8378f..ec5645b 100644
--- a/src/couch_views/test/couch_views_indexer_test.erl
+++ b/src/couch_views/test/couch_views_indexer_test.erl
@@ -127,12 +127,12 @@ updated_docs_are_reindexed(Db) ->
% Check that our id index is updated properly
% as well.
DbName = fabric2_db:name(Db),
- {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
- Sig = Mrst#mrst.sig,
+ {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
fabric2_fdb:transactional(Db, fun(TxDb) ->
- ?assertMatch(
- [{0, 1, _, [1]}],
- couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>)
+ Mrst1 = couch_views_fdb:set_trees(TxDb, Mrst0),
+ ?assertEqual(
+ [{0, [1]}, {1, []}],
+ lists:sort(couch_views_fdb:get_view_keys(TxDb, Mrst1, <<"0">>))
)
end).
@@ -161,12 +161,12 @@ updated_docs_without_changes_are_reindexed(Db) ->
% Check fdb directly to make sure we've also
% removed the id idx keys properly.
DbName = fabric2_db:name(Db),
- {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
- Sig = Mrst#mrst.sig,
+ {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
fabric2_fdb:transactional(Db, fun(TxDb) ->
+ Mrst1 = couch_views_fdb:set_trees(TxDb, Mrst0),
?assertMatch(
- [{0, 1, _, [0]}],
- couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>)
+ [{0, [0]}, {1, []}],
+ lists:sort(couch_views_fdb:get_view_keys(TxDb, Mrst1, <<"0">>))
)
end).
@@ -209,10 +209,10 @@ deleted_docs_are_unindexed(Db) ->
% Check fdb directly to make sure we've also
% removed the id idx keys properly.
DbName = fabric2_db:name(Db),
- {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
- Sig = Mrst#mrst.sig,
+ {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
fabric2_fdb:transactional(Db, fun(TxDb) ->
- ?assertEqual([], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>))
+ Mrst1 = couch_views_fdb:set_trees(TxDb, Mrst0),
+ ?assertEqual([], couch_views_fdb:get_view_keys(TxDb, Mrst1, <<"0">>))
end).
@@ -438,8 +438,8 @@ multiple_design_docs(Db) ->
% This is how we check that no index updates took place
meck:new(couch_views_fdb, [passthrough]),
- meck:expect(couch_views_fdb, write_doc, fun(TxDb, Sig, ViewIds, Doc) ->
- meck:passthrough([TxDb, Sig, ViewIds, Doc])
+ meck:expect(couch_views_fdb, write_doc, fun(TxDb, Mrst, Doc) ->
+ meck:passthrough([TxDb, Mrst, Doc])
end),
DDoc1 = create_ddoc(simple, <<"_design/bar1">>),
@@ -466,7 +466,7 @@ multiple_design_docs(Db) ->
meck:reset(couch_views_fdb),
?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc2, ?MAP_FUN1)),
?assertEqual(ok, wait_job_finished(JobId, 5000)),
- ?assertEqual(0, meck:num_calls(couch_views_fdb, write_doc, 4)),
+ ?assertEqual(0, meck:num_calls(couch_views_fdb, write_doc, 3)),
DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true},
{ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []),
diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl
index 18fa9e6..cc2fe39 100644
--- a/src/couch_views/test/couch_views_size_test.erl
+++ b/src/couch_views/test/couch_views_size_test.erl
@@ -193,16 +193,21 @@ cleanup({Ctx, Db}) ->
create_transition_tests({_Ctx, Db}) ->
- Transitions = generate_transitions(),
- Single = lists:flatmap(fun(T) ->
- Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
- [{Name, fun() -> check_single_transition(Db, T) end}]
- end, lists:sort(Transitions)),
- Multi = lists:flatmap(fun(T) ->
- Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
- [{Name, fun() -> check_multi_transition(Db, T) end}]
- end, lists:sort(group(shuffle(Transitions)))),
- subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+ try
+ throw(disabled),
+ Transitions = generate_transitions(),
+ Single = lists:flatmap(fun(T) ->
+ Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+ [{Name, fun() -> check_single_transition(Db, T) end}]
+ end, lists:sort(Transitions)),
+ Multi = lists:flatmap(fun(T) ->
+ Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+ [{Name, fun() -> check_multi_transition(Db, T) end}]
+ end, lists:sort(group(shuffle(Transitions)))),
+ subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi)
+ catch throw:disabled ->
+ [{"Disabled", fun() -> ok end}]
+ end.
check_single_transition(Db, {Set1, Set2, Transition}) ->
diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl
index 89c341a..b90126a 100644
--- a/src/couch_views/test/couch_views_updater_test.erl
+++ b/src/couch_views/test/couch_views_updater_test.erl
@@ -135,7 +135,7 @@ includes_design_docs({Db, _}) ->
handle_erlfdb_errors({Db, _}) ->
- meck:expect(couch_views_fdb, write_doc, fun(_, _, _, _) ->
+ meck:expect(couch_views_fdb, write_doc, fun(_, _, _) ->
error({erlfdb_error, 1009})
end),
?assertError({erlfdb_error, 1009}, fabric2_db:update_docs(Db, [doc(4)])).
[couchdb] 03/03: Add test suite for reduce views
Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch prototype/fdb-layer-ebtree-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit d2c2dab18e5daeaf3eaad212337b565e6a2b2636
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Aug 5 12:47:37 2020 -0500
Add test suite for reduce views
---
src/couch_views/test/couch_views_red_test.erl | 764 ++++++++++++++++++++++++++
1 file changed, 764 insertions(+)
diff --git a/src/couch_views/test/couch_views_red_test.erl b/src/couch_views/test/couch_views_red_test.erl
new file mode 100644
index 0000000..875e90b
--- /dev/null
+++ b/src/couch_views/test/couch_views_red_test.erl
@@ -0,0 +1,764 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_red_test).
+
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include("couch_views.hrl").
+
+
+-define(TDEF(A), {atom_to_list(A), fun A/1}).
+-define(TDEFI(A), {atom_to_list(A), fun A/0}).
+
+
+with(Tests) ->
+ fun(ArgsTuple) ->
+ lists:map(fun({Name, Fun}) ->
+ {Name, ?_test(Fun(ArgsTuple))}
+ end, Tests)
+ end.
+
+
+-define(NUM_DOCS, 2000).
+
+
+reduce_views_shraed_db_test_() ->
+ {
+ "Reduce views",
+ {
+ setup,
+ fun setup_shared_db/0,
+ fun teardown_shared_db/1,
+ with([
+ ?TDEF(should_reduce),
+ ?TDEF(should_reduce_rev),
+ ?TDEF(should_reduce_start_key),
+ ?TDEF(should_reduce_start_key_rev),
+ ?TDEF(should_reduce_end_key),
+ ?TDEF(should_reduce_end_key_rev),
+ ?TDEF(should_reduce_inclusive_end_false),
+ ?TDEF(should_reduce_inclusive_end_false_rev),
+ ?TDEF(should_reduce_start_and_end_key),
+ ?TDEF(should_reduce_start_and_end_key_rev),
+ ?TDEF(should_reduce_empty_range),
+ ?TDEF(should_reduce_empty_range_rev),
+ ?TDEF(should_reduce_grouped),
+ ?TDEF(should_reduce_grouped_rev),
+ ?TDEF(should_reduce_grouped_start_key),
+ ?TDEF(should_reduce_grouped_start_key_rev),
+ ?TDEF(should_reduce_grouped_end_key),
+ ?TDEF(should_reduce_grouped_end_key_rev),
+ ?TDEF(should_reduce_grouped_inclusive_end_false),
+ ?TDEF(should_reduce_grouped_inclusive_end_false_rev),
+ ?TDEF(should_reduce_grouped_start_and_end_key),
+ ?TDEF(should_reduce_grouped_start_and_end_key_rev),
+ ?TDEF(should_reduce_grouped_empty_range),
+ ?TDEF(should_reduce_grouped_empty_range_rev),
+
+ ?TDEF(should_reduce_array_keys),
+ ?TDEF(should_reduce_grouped_array_keys),
+ ?TDEF(should_reduce_group_1_array_keys),
+ ?TDEF(should_reduce_group_1_array_keys_start_key),
+ ?TDEF(should_reduce_group_1_array_keys_start_key_rev),
+ ?TDEF(should_reduce_group_1_array_keys_end_key),
+ ?TDEF(should_reduce_group_1_array_keys_end_key_rev),
+ ?TDEF(should_reduce_group_1_array_keys_inclusive_end_false),
+ ?TDEF(should_reduce_group_1_array_keys_inclusive_end_false_rev),
+ ?TDEF(should_reduce_group_1_array_keys_start_and_end_key),
+ ?TDEF(should_reduce_group_1_array_keys_start_and_end_key_rev),
+ ?TDEF(should_reduce_group_1_array_keys_sub_array_select),
+ ?TDEF(should_reduce_group_1_array_keys_sub_array_select_rev),
+ ?TDEF(should_reduce_group_1_array_keys_sub_array_inclusive_end),
+ ?TDEF(should_reduce_group_1_array_keys_empty_range),
+ ?TDEF(should_reduce_group_1_array_keys_empty_range_rev)
+ ])
+ }
+ }.
+
+
+reduce_views_individual_test_() ->
+ {
+ "Reduce views",
+ {
+ setup,
+ fun setup_individual/0,
+ fun teardown_individual/1,
+ [
+ ?TDEFI(should_collate_group_keys)
+ ]
+ }
+ }.
+
+
+setup_shared_db() ->
+ Ctx = test_util:start_couch([
+ fabric,
+ couch_jobs,
+ couch_js,
+ couch_views
+ ]),
+ {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+ fabric2_db:update_docs(Db, [create_ddoc()]),
+ make_docs(Db, ?NUM_DOCS),
+ run_query(Db, <<"baz">>, #{limit => 0}),
+ {Db, Ctx}.
+
+
+teardown_shared_db({Db, Ctx}) ->
+ fabric2_db:delete(fabric2_db:name(Db), [{user_ctx, ?ADMIN_USER}]),
+ test_util:stop_couch(Ctx).
+
+
+setup_individual() ->
+ test_util:start_couch([
+ fabric,
+ couch_jobs,
+ couch_js,
+ couch_views
+ ]).
+
+
+teardown_individual(Ctx) ->
+ test_util:stop_couch(Ctx).
+
+
+should_reduce({Db, _}) ->
+ Result = run_query(Db, <<"baz_count">>, #{}),
+ Expect = {ok, [row(null, ?NUM_DOCS)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_rev({Db, _}) ->
+ Args = #{
+ direction => rev
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, ?NUM_DOCS)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_start_key({Db, _}) ->
+ Args = #{
+ start_key => 4
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, ?NUM_DOCS - 3)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_start_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ start_key => 4
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 4)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_end_key({Db, _}) ->
+ Args = #{
+ end_key => 6
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 6)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_end_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ end_key => 6
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, ?NUM_DOCS - 5)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_inclusive_end_false({Db, _}) ->
+ Args = #{
+ end_key => 6,
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 5)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_inclusive_end_false_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ end_key => 6,
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, ?NUM_DOCS - 6)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_start_and_end_key({Db, _}) ->
+ Args = #{
+ start_key => 3,
+ end_key => 5
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 3)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_start_and_end_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ start_key => 5,
+ end_key => 3
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 3)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_empty_range({Db, _}) ->
+ Args = #{
+ start_key => 100000,
+ end_key => 100001
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 0)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_empty_range_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ start_key => 100001,
+ end_key => 100000
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [row(null, 0)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped({Db, _}) ->
+ Args = #{
+ group_level => exact
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(1, ?NUM_DOCS)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => exact
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(?NUM_DOCS, 1, -1)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_start_key({Db, _}) ->
+ Args = #{
+ group_level => exact,
+ start_key => 3
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(3, ?NUM_DOCS)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_start_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => exact,
+ start_key => 3
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(3, 1),
+ row(2, 1),
+ row(1, 1)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_end_key({Db, _}) ->
+ Args = #{
+ group_level => exact,
+ end_key => 6
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(1, 6)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_end_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => exact,
+ end_key => 6
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(?NUM_DOCS, 6, -1)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_inclusive_end_false({Db, _}) ->
+ Args = #{
+ group_level => exact,
+ end_key => 4,
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(1, 3)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_inclusive_end_false_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => exact,
+ end_key => 4,
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(?NUM_DOCS, 5, -1)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_start_and_end_key({Db, _}) ->
+ Args = #{
+ group_level => exact,
+ start_key => 2,
+ end_key => 4
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(2, 4)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_start_and_end_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => exact,
+ start_key => 4,
+ end_key => 2
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, [
+ row(I, 1) || I <- lists:seq(4, 2, -1)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_empty_range({Db, _}) ->
+ Args = #{
+ group_level => exact,
+ start_key => 100000,
+ end_key => 100001
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, []},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_empty_range_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => exact,
+ start_key => 100001,
+ end_key => 100000
+ },
+ Result = run_query(Db, <<"baz_count">>, Args),
+ Expect = {ok, []},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_array_keys({Db, _}) ->
+ Result = run_query(Db, <<"boom">>, #{}),
+ Expect = {ok, [row(null, 1.5 * ?NUM_DOCS)]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_grouped_array_keys({Db, _}) ->
+ Args = #{
+ group_level => exact
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, lists:sort([
+ row([I rem 3, I], 1.5) || I <- lists:seq(1, ?NUM_DOCS)
+ ])},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys({Db, _}) ->
+ Args = #{
+ group_level => 1
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([0], rem_count(0, ?NUM_DOCS) * 1.5),
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5),
+ row([2], rem_count(2, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_start_key({Db, _}) ->
+ Args = #{
+ group_level => 1,
+ start_key => [1]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5),
+ row([2], rem_count(2, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_start_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => 1,
+ start_key => [1, ?NUM_DOCS + 1]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5),
+ row([0], rem_count(0, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_end_key({Db, _}) ->
+ Args = #{
+ group_level => 1,
+ end_key => [1, ?NUM_DOCS + 1]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([0], rem_count(0, ?NUM_DOCS) * 1.5),
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_end_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => 1,
+ end_key => [1]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([2], rem_count(2, ?NUM_DOCS) * 1.5),
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_inclusive_end_false({Db, _}) ->
+ Args = #{
+ group_level => 1,
+ end_key => [1],
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([0], rem_count(0, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_inclusive_end_false_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => 1,
+ end_key => [1, ?NUM_DOCS + 1],
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([2], rem_count(2, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_start_and_end_key({Db, _}) ->
+ Args = #{
+ group_level => 1,
+ start_key => [1],
+ end_key => [1, ?NUM_DOCS + 1]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_start_and_end_key_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => 1,
+ start_key => [1, ?NUM_DOCS + 1],
+ end_key => [1]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([1], rem_count(1, ?NUM_DOCS) * 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_sub_array_select({Db, _}) ->
+ % Test that keys are applied below the key grouping
+ Args = #{
+ group_level => 1,
+ start_key => [0, ?NUM_DOCS - 6],
+ end_key => [1, 4]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([0], 3.0),
+ row([1], 3.0)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_sub_array_select_rev({Db, _}) ->
+ % Test that keys are applied below the key grouping
+ Args = #{
+ direction => rev,
+ group_level => 1,
+ start_key => [1, 4],
+ end_key => [0, ?NUM_DOCS - 6]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([1], 3.0),
+ row([0], 3.0)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_sub_array_inclusive_end({Db, _}) ->
+ % Test that keys are applied below the key grouping
+ Args = #{
+ group_level => 1,
+ start_key => [0, ?NUM_DOCS - 6],
+ end_key => [1, 4],
+ inclusive_end => false
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, [
+ row([0], 3.0),
+ row([1], 1.5)
+ ]},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_empty_range({Db, _}) ->
+ Args = #{
+ group_level => 1,
+ start_key => [100],
+ end_key => [101]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, []},
+ ?assertEqual(Expect, Result).
+
+
+should_reduce_group_1_array_keys_empty_range_rev({Db, _}) ->
+ Args = #{
+ direction => rev,
+ group_level => 1,
+ start_key => [101],
+ end_key => [100]
+ },
+ Result = run_query(Db, <<"boom">>, Args),
+ Expect = {ok, []},
+ ?assertEqual(Expect, Result).
+
+
+should_collate_group_keys() ->
+ {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+ DDoc = couch_doc:from_json_obj({[
+ {<<"_id">>, <<"_design/bar">>},
+ {<<"views">>, {[
+ {<<"group">>, {[
+ {<<"map">>, <<"function(doc) {emit([doc.val], 1);}">>},
+ {<<"reduce">>, <<"_count">>}
+ ]}}
+ ]}}
+ ]}),
+ % val is "föö" without combining characters
+ Doc1 = couch_doc:from_json_obj({[
+ {<<"_id">>, <<"a">>},
+ {<<"val">>, <<16#66, 16#C3, 16#B6, 16#C3, 16#B6>>}
+ ]}),
+ % val is "föö" without combining characters
+ Doc2 = couch_doc:from_json_obj({[
+ {<<"_id">>, <<"b">>},
+ {<<"val">>, <<16#66, 16#6F, 16#CC, 16#88, 16#6F, 16#CC, 16#88>>}
+ ]}),
+ {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1, Doc2]),
+
+ % An implementation detail we have is that depending on
+ % the direction of the view read we'll get the first
+ % or last key to represent a group. In this particular
+ % implementation the document ID breaks the sort tie
+ % in the map view data.
+
+ ArgsFwd = #{
+ group_level => exact
+ },
+ ResultFwd = run_query(Db, DDoc, <<"group">>, ArgsFwd),
+ ExpectFwd = {ok, [
+ row([<<16#66, 16#C3, 16#B6, 16#C3, 16#B6>>], 2)
+ ]},
+ ?assertEqual(ExpectFwd, ResultFwd),
+
+ ArgsRev = #{
+ direction => rev,
+ group_level => exact
+ },
+ ResultRev = run_query(Db, DDoc, <<"group">>, ArgsRev),
+ ExpectRev = {ok, [
+ row([<<16#66, 16#6F, 16#CC, 16#88, 16#6F, 16#CC, 16#88>>], 2)
+ ]},
+ ?assertEqual(ExpectRev, ResultRev).
+
+
+rem_count(Rem, Count) ->
+ Members = [I || I <- lists:seq(1, Count), I rem 3 == Rem],
+ length(Members).
+
+
+run_query(Db, Idx, Args) ->
+ DDoc = create_ddoc(),
+ run_query(Db, DDoc, Idx, Args).
+
+
+run_query(Db, DDoc, Idx, Args) ->
+ couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], Args).
+
+
+default_cb(complete, Acc) ->
+ {ok, lists:reverse(Acc)};
+default_cb({final, Info}, []) ->
+ {ok, [Info]};
+default_cb({final, _}, Acc) ->
+ {ok, Acc};
+default_cb({meta, _}, Acc) ->
+ {ok, Acc};
+default_cb(ok, ddoc_updated) ->
+ {ok, ddoc_updated};
+default_cb(Row, Acc) ->
+ {ok, [Row | Acc]}.
+
+
+row(Key, Value) ->
+ {row, [{key, Key}, {value, Value}]}.
+
+
+create_ddoc() ->
+ couch_doc:from_json_obj({[
+ {<<"_id">>, <<"_design/bar">>},
+ {<<"views">>, {[
+ {<<"baz">>, {[
+ {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>}
+ ]}},
+ {<<"baz_count">>, {[
+ {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>},
+ {<<"reduce">>, <<"_count">>}
+ ]}},
+ {<<"baz_size">>, {[
+ {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>},
+ {<<"reduce">>, <<"_sum">>}
+ ]}},
+ {<<"boom">>, {[
+ {<<"map">>, <<
+ "function(doc) {\n"
+ " emit([doc.val % 3, doc.val], 1.5);\n"
+ "}"
+ >>},
+ {<<"reduce">>, <<"_sum">>}
+ ]}},
+ {<<"bing">>, {[
+ {<<"map">>, <<"function(doc) {}">>},
+ {<<"reduce">>, <<"_count">>}
+ ]}},
+ {<<"bing_hyper">>, {[
+ {<<"map">>, <<"function(doc) {}">>},
+ {<<"reduce">>, <<"_approx_count_distinct">>}
+ ]}},
+ {<<"doc_emit">>, {[
+ {<<"map">>, <<"function(doc) {emit(doc.val, doc)}">>}
+ ]}},
+ {<<"duplicate_keys">>, {[
+ {<<"map">>, <<
+ "function(doc) {\n"
+ " emit(doc._id, doc.val);\n"
+ " emit(doc._id, doc.val + 1);\n"
+ "}">>},
+ {<<"reduce">>, <<"_count">>}
+ ]}},
+ {<<"zing">>, {[
+ {<<"map">>, <<
+ "function(doc) {\n"
+ " if(doc.foo !== undefined)\n"
+ " emit(doc.foo, 0);\n"
+ "}"
+ >>}
+ ]}}
+ ]}}
+ ]}).
+
+
+make_docs(Db, TotalDocs) when TotalDocs > 0 ->
+ make_docs(Db, TotalDocs, 0).
+
+
+make_docs(Db, TotalDocs, DocsMade) when TotalDocs > DocsMade ->
+ DocCount = min(TotalDocs - DocsMade, 500),
+ Docs = [doc(I + DocsMade) || I <- lists:seq(1, DocCount)],
+ fabric2_db:update_docs(Db, Docs),
+ make_docs(Db, TotalDocs, DocsMade + DocCount);
+
+make_docs(_Db, TotalDocs, DocsMade) when TotalDocs =< DocsMade ->
+ ok.
+
+
+doc(Id) ->
+ couch_doc:from_json_obj({[
+ {<<"_id">>, list_to_binary(integer_to_list(Id))},
+ {<<"val">>, Id}
+ ]}).
[couchdb] 02/03: Use ebtree for reduce functions
Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch prototype/fdb-layer-ebtree-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit dd3d0cae5c01f64862e22236ed7cb4ee00220ea5
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Jul 29 10:34:48 2020 -0500
Use ebtree for reduce functions
---
src/couch_views/src/couch_views.erl | 10 --
src/couch_views/src/couch_views_fdb.erl | 142 ++++++++++++++++++++++----
src/couch_views/src/couch_views_reader.erl | 153 ++++++++++++++++++++++++++++-
3 files changed, 271 insertions(+), 34 deletions(-)
diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl
index f6e163a..275a049 100644
--- a/src/couch_views/src/couch_views.erl
+++ b/src/couch_views/src/couch_views.erl
@@ -49,10 +49,6 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) ->
Args2 = couch_mrview_util:set_view_type(Args1, ViewName, Views),
Args3 = couch_mrview_util:validate_args(Args2),
ok = check_range(Args3),
- case is_reduce_view(Args3) of
- true -> throw(not_implemented);
- false -> ok
- end,
try
fabric2_fdb:transactional(Db, fun(TxDb) ->
@@ -165,12 +161,6 @@ maybe_update_view(TxDb, Mrst, false, _Args) ->
end.
-is_reduce_view(#mrargs{view_type = ViewType}) ->
- ViewType =:= red;
-is_reduce_view({Reduce, _, _}) ->
- Reduce =:= red.
-
-
to_mrargs(#mrargs{} = Args) ->
Args;
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index 9fe9a89..07728ab 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -28,6 +28,7 @@
get_kv_size/2,
fold_map_idx/5,
+ fold_red_idx/6,
write_doc/3,
@@ -144,7 +145,7 @@ get_row_count(TxDb, View) ->
#{
tx := Tx
} = TxDb,
- {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+ {Count, _, _} = ebtree:full_reduce(Tx, View#mrview.btree),
Count.
@@ -152,7 +153,7 @@ get_kv_size(TxDb, View) ->
#{
tx := Tx
} = TxDb,
- {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+ {_, TotalSize, _} = ebtree:full_reduce(Tx, View#mrview.btree),
TotalSize.
@@ -216,6 +217,74 @@ fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
end.
+fold_red_idx(TxDb, View, Idx, Options, Callback, Acc0) ->
+ #{
+ tx := Tx
+ } = TxDb,
+ #mrview{
+ btree = Btree
+ } = View,
+
+ {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun} = to_red_opts(Options),
+
+ Wrapper = fun({GroupKey, Reduction}, WAcc) ->
+ {_RowCount, _RowSize, UserReds} = Reduction,
+ RedValue = lists:nth(Idx, UserReds),
+ Callback(GroupKey, RedValue, WAcc)
+ end,
+
+ case {GroupKeyFun, Dir} of
+ {group_all, fwd} ->
+ EBtreeOpts = [
+ {dir, fwd},
+ {inclusive_end, InclusiveEnd}
+ ],
+ Reduction = ebtree:reduce(Tx, Btree, StartKey, EndKey, EBtreeOpts),
+ Wrapper({null, Reduction}, Acc0);
+ {F, fwd} when is_function(F) ->
+ EBtreeOpts = [
+ {dir, fwd},
+ {inclusive_end, InclusiveEnd}
+ ],
+ ebtree:group_reduce(
+ Tx,
+ Btree,
+ StartKey,
+ EndKey,
+ GroupKeyFun,
+ Wrapper,
+ Acc0,
+ EBtreeOpts
+ );
+ {group_all, rev} ->
+ % Start/End keys swapped on purpose because ebtree. Also
+ % inclusive_start for same reason.
+ EBtreeOpts = [
+ {dir, rev},
+ {inclusive_start, InclusiveEnd}
+ ],
+ Reduction = ebtree:reduce(Tx, Btree, EndKey, StartKey, EBtreeOpts),
+ Wrapper({null, Reduction}, Acc0);
+ {F, rev} when is_function(F) ->
+ % Start/End keys swapped on purpose because ebtree. Also
+ % inclusive_start for same reason.
+ EBtreeOpts = [
+ {dir, rev},
+ {inclusive_start, InclusiveEnd}
+ ],
+ ebtree:group_reduce(
+ Tx,
+ Btree,
+ EndKey,
+ StartKey,
+ GroupKeyFun,
+ Wrapper,
+ Acc0,
+ EBtreeOpts
+ )
+ end.
+
+
write_doc(TxDb, Mrst, #{deleted := true} = Doc) ->
#{
tx := Tx
@@ -336,7 +405,7 @@ open_id_tree(TxDb, Sig) ->
ebtree:open(Tx, Prefix, get_order(id_btree), []).
-open_view_tree(TxDb, Sig, _Lang, View) ->
+open_view_tree(TxDb, Sig, Lang, View) ->
#{
tx := Tx,
db_prefix := DbPrefix
@@ -347,7 +416,7 @@ open_view_tree(TxDb, Sig, _Lang, View) ->
Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
TreeOpts = [
{collate_fun, couch_views_util:collate_fun(View)},
- {reduce_fun, make_reduce_fun(View)}
+ {reduce_fun, make_reduce_fun(Lang, View)}
],
View#mrview{
btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
@@ -360,26 +429,30 @@ get_order(view_btree) ->
config:get_integer("couch_views", "view_btree_node_size", 100).
-make_reduce_fun(#mrview{}) ->
+make_reduce_fun(Lang, #mrview{} = View) ->
+ RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs],
fun
- (KVs, _ReReduce = false) ->
- TotalSize = lists:foldl(fun({K, V}, Acc) ->
+ (KVs0, _ReReduce = false) ->
+ KVs1 = detuple_kvs(expand_dupes(KVs0)),
+ TotalSize = lists:foldl(fun([K, V], Acc) ->
KSize = couch_ejson_size:encoded_size(K),
- VSize = case V of
- {dups, Dups} ->
- lists:foldl(fun(D, DAcc) ->
- DAcc + couch_ejson_size:encoded_size(D)
- end, 0, Dups);
- _ ->
- couch_ejson_size:encoded_size(V)
- end,
+ VSize = couch_ejson_size:encoded_size(V),
KSize + VSize + Acc
- end, 0, KVs),
- {length(KVs), TotalSize};
- (KRs, _ReReduce = true) ->
- lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
- {Count + CountAcc, Size + SizeAcc}
- end, {0, 0}, KRs)
+ end, 0, KVs1),
+ {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs1),
+ {length(KVs1), TotalSize, UserReds};
+ (Reductions, _ReReduce = true) ->
+ FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) ->
+ NewCAcc = Count + CAcc,
+ NewSAcc = Size + SAcc,
+ NewURedAcc = [UserReds | URedAcc],
+ {NewCAcc, NewSAcc, NewURedAcc}
+ end,
+ InitAcc = {0, 0, []},
+ FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions),
+ {FinalCount, FinalSize, UReds} = FinalAcc,
+ {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds),
+ {FinalCount, FinalSize, Result}
end.
@@ -409,6 +482,17 @@ to_map_opts(Options) ->
{Dir, StartKey, EndKey, InclusiveEnd}.
+to_red_opts(Options) ->
+ {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+ GroupKeyFun = case lists:keyfind(group_key_fun, 1, Options) of
+ {group_key_fun, GKF} -> GKF;
+ false -> fun({_Key, _DocId}) -> global_group end
+ end,
+
+ {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun}.
+
+
dedupe_rows(View, KVs0) ->
CollateFun = couch_views_util:collate_fun(View),
KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) ->
@@ -446,6 +530,22 @@ combine_vals(V1, V2) ->
{dups, [V1, V2]}.
+expand_dupes([]) ->
+ [];
+expand_dupes([{K, {dups, Dups}} | Rest]) ->
+ Expanded = [{K, D} || D <- Dups],
+ Expanded ++ expand_dupes(Rest);
+expand_dupes([{K, V} | Rest]) ->
+ [{K, V} | expand_dupes(Rest)].
+
+
+detuple_kvs([]) ->
+ [];
+detuple_kvs([KV | Rest]) ->
+ {{Key, Id}, Value} = KV,
+ [[[Key, Id], Value] | detuple_kvs(Rest)].
+
+
id_tree_prefix(DbPrefix, Sig) ->
Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE},
erlfdb_tuple:pack(Key, DbPrefix).
diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl
index 6342f61..c39598a 100644
--- a/src/couch_views/src/couch_views_reader.erl
+++ b/src/couch_views/src/couch_views_reader.erl
@@ -23,7 +23,15 @@
-include_lib("fabric/include/fabric2.hrl").
-read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
+read(Db, Mrst, ViewName, UserCallback, UserAcc, Args) ->
+ ReadFun = case Args of
+ #mrargs{view_type = map} -> fun read_map_view/6;
+ #mrargs{view_type = red} -> fun read_red_view/6
+ end,
+ ReadFun(Db, Mrst, ViewName, UserCallback, UserAcc, Args).
+
+
+read_map_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
try
fabric2_fdb:transactional(Db, fun(TxDb) ->
#mrst{
@@ -68,6 +76,73 @@ read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
end.
+read_red_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
+ try
+ fabric2_fdb:transactional(Db, fun(TxDb) ->
+ #mrst{
+ language = Lang,
+ views = Views
+ } = Mrst = couch_views_fdb:set_trees(TxDb, Mrst0),
+
+ #mrargs{
+ extra = Extra
+ } = Args,
+
+ {Idx, Lang, View} = get_red_view(Lang, Args, ViewName, Views),
+ Fun = fun handle_red_row/3,
+
+ Meta = get_red_meta(TxDb, Mrst, View, Args),
+ UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)),
+
+ Finalizer = case couch_util:get_value(finalizer, Extra) of
+ undefined ->
+ {_, FunSrc} = lists:nth(Idx, View#mrview.reduce_funs),
+ FunSrc;
+ CustomFun->
+ CustomFun
+ end,
+
+ Acc0 = #{
+ db => TxDb,
+ skip => Args#mrargs.skip,
+ limit => Args#mrargs.limit,
+ mrargs => undefined,
+ finalizer => Finalizer,
+ red_idx => Idx,
+ language => Lang,
+ callback => UserCallback,
+ acc => UserAcc1
+ },
+
+ Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) ->
+ Opts = mrargs_to_fdb_options(KeyArgs),
+ KeyAcc1 = KeyAcc0#{
+ mrargs := KeyArgs
+ },
+ couch_views_fdb:fold_red_idx(
+ TxDb,
+ View,
+ Idx,
+ Opts,
+ Fun,
+ KeyAcc1
+ )
+ end, Acc0, expand_keys_args(Args)),
+
+ #{
+ acc := UserAcc2
+ } = Acc1,
+ {ok, maybe_stop(UserCallback(complete, UserAcc2))}
+ end)
+ catch
+ throw:{complete, Out} ->
+ {_, Final} = UserCallback(complete, Out),
+ {ok, Final};
+ throw:{done, Out} ->
+ {ok, Out}
+ end.
+
+
get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) ->
TotalRows = couch_views_fdb:get_row_count(TxDb, View),
ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
@@ -78,6 +153,14 @@ get_map_meta(TxDb, _Mrst, View, #mrargs{}) ->
{meta, [{total, TotalRows}, {offset, null}]}.
+get_red_meta(TxDb, Mrst, _View, #mrargs{update_seq = true}) ->
+ ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
+ {meta, [{update_seq, ViewSeq}]};
+
+get_red_meta(_TxDb, _Mrst, _View, #mrargs{}) ->
+ {meta, []}.
+
+
handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
Acc#{skip := Skip - 1};
@@ -115,6 +198,38 @@ handle_map_row(DocId, Key, Value, Acc) ->
Acc#{limit := Limit - 1, acc := UserAcc1}.
+handle_red_row(_Key, _Red, #{skip := Skip} = Acc) when Skip > 0 ->
+ Acc#{skip := Skip - 1};
+
+handle_red_row(_Key, _Value, #{limit := 0, acc := UserAcc}) ->
+ throw({complete, UserAcc});
+
+handle_red_row(Key0, Value0, Acc) ->
+ #{
+ limit := Limit,
+ finalizer := Finalizer,
+ callback := UserCallback,
+ acc := UserAcc0
+ } = Acc,
+
+ Key1 = case Key0 of
+ undefined -> null;
+ _ -> Key0
+ end,
+ Value1 = maybe_finalize(Finalizer, Value0),
+ Row = [{key, Key1}, {value, Value1}],
+
+ UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)),
+ Acc#{limit := Limit - 1, acc := UserAcc1}.
+
+
+maybe_finalize(null, Red) ->
+ Red;
+maybe_finalize(Finalizer, Red) ->
+ {ok, Finalized} = couch_query_servers:finalize(Finalizer, Red),
+ Finalized.
+
+
get_map_view(Lang, Args, ViewName, Views) ->
case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
{map, View, _Args} -> View;
@@ -122,6 +237,13 @@ get_map_view(Lang, Args, ViewName, Views) ->
end.
+get_red_view(Lang, Args, ViewName, Views) ->
+ case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
+ {red, {Idx, Lang, View}, _} -> {Idx, Lang, View};
+ _ -> throw({not_found, missing_named_view})
+ end.
+
+
expand_keys_args(#mrargs{keys = undefined} = Args) ->
[Args];
@@ -136,12 +258,14 @@ expand_keys_args(#mrargs{keys = Keys} = Args) ->
mrargs_to_fdb_options(Args) ->
#mrargs{
+ view_type = ViewType,
start_key = StartKey,
start_key_docid = StartKeyDocId,
end_key = EndKey,
end_key_docid = EndKeyDocId0,
direction = Direction,
- inclusive_end = InclusiveEnd
+ inclusive_end = InclusiveEnd,
+ group_level = GroupLevel
} = Args,
StartKeyOpts = if StartKey == undefined -> []; true ->
@@ -160,10 +284,33 @@ mrargs_to_fdb_options(Args) ->
[{end_key, {EndKey, EndKeyDocId}}]
end,
+ GroupFunOpt = make_group_key_fun(ViewType, GroupLevel),
+
[
{dir, Direction},
{inclusive_end, InclusiveEnd}
- ] ++ StartKeyOpts ++ EndKeyOpts.
+ ] ++ StartKeyOpts ++ EndKeyOpts ++ GroupFunOpt.
+
+
+make_group_key_fun(map, _) ->
+ [];
+
+make_group_key_fun(red, exact) ->
+ [
+ {group_key_fun, fun({Key, _DocId}) -> Key end}
+ ];
+
+make_group_key_fun(red, 0) ->
+ [
+ {group_key_fun, group_all}
+ ];
+
+make_group_key_fun(red, N) when is_integer(N), N > 0 ->
+ GKFun = fun
+ ({Key, _DocId}) when is_list(Key) -> lists:sublist(Key, N);
+ ({Key, _DocId}) -> Key
+ end,
+ [{group_key_fun, GKFun}].
maybe_stop({ok, Acc}) -> Acc;