You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2020/08/12 18:40:01 UTC

[GitHub] [couchdb] davisp opened a new pull request #3073: Prototype/fdb layer ebtree views

davisp opened a new pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073


   ## Overview
   
   Full support for reduce functions using ebtree for all map/reduce data.
   
   ## Testing recommendations
   
   `make check`
   
   ## Related Issues or Pull Requests
   
   This is an alternative approach to:
   
   #2984 
   #3018
   
   ## Checklist
   
   - [x] Code is written and works correctly
   - [x] Changes are covered by tests
   - [x] Any new configurable parameters are documented in `rel/overlay/etc/default.ini`
   - [ ] A PR for documentation changes has been made in https://github.com/apache/couchdb-documentation
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] davisp closed pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
davisp closed pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] rnewson commented on a change in pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
rnewson commented on a change in pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#discussion_r469579555



##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +373,223 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
-
-
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
-
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
 
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+get_view_keys(TxDb, Mrst, DocId) ->
     #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
+    #mrst{
+        id_btree = IdTree
+    } = Mrst,
+    case ebtree:lookup(Tx, IdTree, DocId) of
+        {DocId, ViewKeys} -> ViewKeys;
+        false -> []
+    end.
 
 
-get_view_keys(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(Lang, View)},
+        {persist_fun, fun persist_chunks/3}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(Lang, #mrview{} = View) ->
+    RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs],
+    fun
+        (KVs0, _ReReduce = false) ->
+            KVs1 = detuple_kvs(expand_dupes(KVs0)),
+            TotalSize = lists:foldl(fun([K, V], Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = couch_ejson_size:encoded_size(V),
+                KSize + VSize + Acc
+            end, 0, KVs1),
+            {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs1),
+            {length(KVs1), TotalSize, UserReds};
+        (Reductions, _ReReduce = true) ->
+            FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) ->
+                NewCAcc = Count + CAcc,
+                NewSAcc = Size + SAcc,
+                NewURedAcc = [UserReds | URedAcc],
+                {NewCAcc, NewSAcc, NewURedAcc}
+            end,
+            InitAcc = {0, 0, []},
+            FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions),
+            {FinalCount, FinalSize, UReds} = FinalAcc,
+            {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds),
+            {FinalCount, FinalSize, Result}
+    end.
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),

Review comment:
       aegis:encrypt here




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] rnewson commented on a change in pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
rnewson commented on a change in pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#discussion_r469577223



##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +373,223 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
-
-
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
-
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
 
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+get_view_keys(TxDb, Mrst, DocId) ->
     #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
+    #mrst{
+        id_btree = IdTree
+    } = Mrst,
+    case ebtree:lookup(Tx, IdTree, DocId) of
+        {DocId, ViewKeys} -> ViewKeys;
+        false -> []
+    end.
 
 
-get_view_keys(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(Lang, View)},
+        {persist_fun, fun persist_chunks/3}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->

Review comment:
       `when is_integer(V) ->`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] davisp commented on pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
davisp commented on pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#issuecomment-695085080


   Closing in favor of #3164 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] garrensmith commented on a change in pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#discussion_r471298832



##########
File path: src/mango/src/mango_idx_view.erl
##########
@@ -34,6 +34,7 @@
 -include_lib("couch/include/couch_db.hrl").
 -include("mango.hrl").
 -include("mango_idx.hrl").
+-include("mango_idx_view.hrl").

Review comment:
       I've only had a very quick look at this PR. But I'm not sure why the mango indexes are affected? From our discussion, we agreed that map only indexes wouldn't use ebtree?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] davisp commented on a change in pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
davisp commented on a change in pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#discussion_r470061117



##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +373,223 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
-
-
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
-
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
 
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+get_view_keys(TxDb, Mrst, DocId) ->
     #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
+    #mrst{
+        id_btree = IdTree
+    } = Mrst,
+    case ebtree:lookup(Tx, IdTree, DocId) of
+        {DocId, ViewKeys} -> ViewKeys;
+        false -> []
+    end.
 
 
-get_view_keys(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(Lang, View)},
+        {persist_fun, fun persist_chunks/3}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(Lang, #mrview{} = View) ->
+    RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs],
+    fun
+        (KVs0, _ReReduce = false) ->
+            KVs1 = detuple_kvs(expand_dupes(KVs0)),
+            TotalSize = lists:foldl(fun([K, V], Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = couch_ejson_size:encoded_size(V),
+                KSize + VSize + Acc
+            end, 0, KVs1),
+            {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs1),
+            {length(KVs1), TotalSize, UserReds};
+        (Reductions, _ReReduce = true) ->
+            FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) ->
+                NewCAcc = Count + CAcc,
+                NewSAcc = Size + SAcc,
+                NewURedAcc = [UserReds | URedAcc],
+                {NewCAcc, NewSAcc, NewURedAcc}
+            end,
+            InitAcc = {0, 0, []},
+            FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions),
+            {FinalCount, FinalSize, UReds} = FinalAcc,
+            {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds),
+            {FinalCount, FinalSize, Result}
+    end.
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),

Review comment:
       That should be part of the encode function. Need to add that as well.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [couchdb] garrensmith commented on a change in pull request #3073: Prototype/fdb layer ebtree views

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#discussion_r471300265



##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -126,92 +129,218 @@ set_update_seq(TxDb, Sig, Seq) ->
     ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq).
 
 
-get_row_count(TxDb, #mrst{sig = Sig}, ViewId) ->
+set_trees(TxDb, Mrst) ->
+    #mrst{
+        sig = Sig,
+        language = Lang,
+        views = Views
+    } = Mrst,
+    Mrst#mrst{
+        id_btree = open_id_tree(TxDb, Sig),

Review comment:
       Why do we need an id tree?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org