You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2020/08/13 16:04:24 UTC

[GitHub] [couchdb] davisp commented on a change in pull request #3073: Prototype/fdb layer ebtree views

davisp commented on a change in pull request #3073:
URL: https://github.com/apache/couchdb/pull/3073#discussion_r470061117



##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +373,223 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
-
-
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
-
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
 
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+get_view_keys(TxDb, Mrst, DocId) ->
     #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
+    #mrst{
+        id_btree = IdTree
+    } = Mrst,
+    case ebtree:lookup(Tx, IdTree, DocId) of
+        {DocId, ViewKeys} -> ViewKeys;
+        false -> []
+    end.
 
 
-get_view_keys(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(Lang, View)},
+        {persist_fun, fun persist_chunks/3}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(Lang, #mrview{} = View) ->
+    RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs],
+    fun
+        (KVs0, _ReReduce = false) ->
+            KVs1 = detuple_kvs(expand_dupes(KVs0)),
+            TotalSize = lists:foldl(fun([K, V], Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = couch_ejson_size:encoded_size(V),
+                KSize + VSize + Acc
+            end, 0, KVs1),
+            {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs1),
+            {length(KVs1), TotalSize, UserReds};
+        (Reductions, _ReReduce = true) ->
+            FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) ->
+                NewCAcc = Count + CAcc,
+                NewSAcc = Size + SAcc,
+                NewURedAcc = [UserReds | URedAcc],
+                {NewCAcc, NewSAcc, NewURedAcc}
+            end,
+            InitAcc = {0, 0, []},
+            FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions),
+            {FinalCount, FinalSize, UReds} = FinalAcc,
+            {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds),
+            {FinalCount, FinalSize, Result}
+    end.
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),

Review comment:
       That should be part of the encode function. Need to add that as well.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org