You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2020/09/22 11:28:51 UTC

[GitHub] [couchdb] garrensmith commented on a change in pull request #3164: Feature ebtree views

garrensmith commented on a change in pull request #3164:
URL: https://github.com/apache/couchdb/pull/3164#discussion_r492561310



##########
File path: rel/overlay/etc/default.ini
##########
@@ -337,6 +337,12 @@ iterations = 10 ; iterations for password hashing
 ; The maximum allowed value size emitted from a view for a document (in bytes)
 ;value_size_limit = 64000
 ;
+; The order of B+Tree nodes used by the id btree

Review comment:
       We will have to explain what order means. I'm not 100% sure the best place. Maybe it should go in the docs 

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +290,284 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
 
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
-
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
-
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun(id_tree)}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, _Lang, View) ->

Review comment:
       I don't think any of the tree-related functions should be in this file. It muddies what this module is doing. 
   I think we need a module like `couch_views_tree` and then have all the related tree functions like open_id_tree, open_view_tree, make_reduce_fun etc in that.

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +290,284 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
 
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
-
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
-
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun(id_tree)}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, _Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
-
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(View)},
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun({view, ViewId})}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(#mrview{}) ->
+    fun
+        (KVs, _ReReduce = false) ->
+            TotalSize = lists:foldl(fun({K, V}, Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = case V of
+                    {dups, Dups} ->
+                        lists:foldl(fun(D, DAcc) ->
+                            DAcc + couch_ejson_size:encoded_size(D)
+                        end, 0, Dups);
+                    _ ->
+                        couch_ejson_size:encoded_size(V)
+                end,
+                KSize + VSize + Acc
+            end, 0, KVs),
+            {length(KVs), TotalSize};
+        (KRs, _ReReduce = true) ->
+            lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
+                {Count + CountAcc, Size + SizeAcc}
+            end, {0, 0}, KRs)
+    end.
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
 
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    LastId = lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),
+        Id + 1
+    end, 0, Chunks),
+
+    % We update nodes in place, so its possible that
+    % a node shrank. This clears any keys that we haven't
+    % just overwritten for the provided key.
+    LastIdKey = erlfdb_tuple:pack({LastId}, Key),
+    EndRange = <<Key/binary, 16#FF>>,
+    erlfdb:clear_range(Tx, LastIdKey, EndRange);
+
+persist_chunks(Tx, get, Key) ->
+    Rows = erlfdb:get_range_startswith(Tx, Key),
+    Values = [V || {_K, V} <- Rows],
+    iolist_to_binary(Values);
+
+persist_chunks(Tx, clear, Key) ->
+    erlfdb:clear_range_startswith(Tx, Key).
+
+
+create_cache_fun(TreeId) ->
+    CacheTid = case get(TreeId) of
+        undefined ->
+            Tid = ets:new(?MODULE, [protected, set]),
+            put(TreeId, {ebtree_cache, Tid}),
+            Tid;
+        {ebtree_cache, Tid} ->
+            Tid
+    end,
+    fun
+        (set, [Id, Node]) ->
+            true = ets:insert_new(CacheTid, {Id, Node}),
+            ok;
+        (clear, Id) ->
+            ets:delete(CacheTid, Id),
+            ok;
+        (get, Id) ->
+            case ets:lookup(CacheTid, Id) of
+                [{Id, Node}] -> Node;
+                [] -> undefined
+            end
+    end.
 
-seq_key(DbPrefix, Sig) ->
-    Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig},
-    erlfdb_tuple:pack(Key, DbPrefix).
 
+to_map_opts(Options) ->

Review comment:
       This also shouldn't be in this module.  

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +290,284 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
 
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
-
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
-
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun(id_tree)}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, _Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
-
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(View)},
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun({view, ViewId})}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(#mrview{}) ->
+    fun
+        (KVs, _ReReduce = false) ->
+            TotalSize = lists:foldl(fun({K, V}, Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = case V of
+                    {dups, Dups} ->
+                        lists:foldl(fun(D, DAcc) ->
+                            DAcc + couch_ejson_size:encoded_size(D)
+                        end, 0, Dups);
+                    _ ->
+                        couch_ejson_size:encoded_size(V)
+                end,
+                KSize + VSize + Acc
+            end, 0, KVs),
+            {length(KVs), TotalSize};
+        (KRs, _ReReduce = true) ->
+            lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
+                {Count + CountAcc, Size + SizeAcc}
+            end, {0, 0}, KRs)
+    end.
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
 
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    LastId = lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),
+        Id + 1
+    end, 0, Chunks),
+
+    % We update nodes in place, so its possible that
+    % a node shrank. This clears any keys that we haven't
+    % just overwritten for the provided key.
+    LastIdKey = erlfdb_tuple:pack({LastId}, Key),
+    EndRange = <<Key/binary, 16#FF>>,
+    erlfdb:clear_range(Tx, LastIdKey, EndRange);
+
+persist_chunks(Tx, get, Key) ->
+    Rows = erlfdb:get_range_startswith(Tx, Key),
+    Values = [V || {_K, V} <- Rows],
+    iolist_to_binary(Values);
+
+persist_chunks(Tx, clear, Key) ->
+    erlfdb:clear_range_startswith(Tx, Key).
+
+
+create_cache_fun(TreeId) ->

Review comment:
       This shouldn't be in this module. 

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +290,284 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
 
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
-
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
-
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun(id_tree)}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, _Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
-
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(View)},
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun({view, ViewId})}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(#mrview{}) ->
+    fun
+        (KVs, _ReReduce = false) ->
+            TotalSize = lists:foldl(fun({K, V}, Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = case V of
+                    {dups, Dups} ->
+                        lists:foldl(fun(D, DAcc) ->
+                            DAcc + couch_ejson_size:encoded_size(D)
+                        end, 0, Dups);
+                    _ ->
+                        couch_ejson_size:encoded_size(V)
+                end,
+                KSize + VSize + Acc
+            end, 0, KVs),
+            {length(KVs), TotalSize};
+        (KRs, _ReReduce = true) ->
+            lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
+                {Count + CountAcc, Size + SizeAcc}
+            end, {0, 0}, KRs)
+    end.
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
 
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    LastId = lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),
+        Id + 1
+    end, 0, Chunks),
+
+    % We update nodes in place, so its possible that
+    % a node shrank. This clears any keys that we haven't
+    % just overwritten for the provided key.
+    LastIdKey = erlfdb_tuple:pack({LastId}, Key),
+    EndRange = <<Key/binary, 16#FF>>,
+    erlfdb:clear_range(Tx, LastIdKey, EndRange);
+
+persist_chunks(Tx, get, Key) ->
+    Rows = erlfdb:get_range_startswith(Tx, Key),
+    Values = [V || {_K, V} <- Rows],
+    iolist_to_binary(Values);
+
+persist_chunks(Tx, clear, Key) ->
+    erlfdb:clear_range_startswith(Tx, Key).
+
+
+create_cache_fun(TreeId) ->
+    CacheTid = case get(TreeId) of
+        undefined ->
+            Tid = ets:new(?MODULE, [protected, set]),
+            put(TreeId, {ebtree_cache, Tid}),
+            Tid;
+        {ebtree_cache, Tid} ->
+            Tid
+    end,
+    fun
+        (set, [Id, Node]) ->
+            true = ets:insert_new(CacheTid, {Id, Node}),
+            ok;
+        (clear, Id) ->
+            ets:delete(CacheTid, Id),
+            ok;
+        (get, Id) ->
+            case ets:lookup(CacheTid, Id) of
+                [{Id, Node}] -> Node;
+                [] -> undefined
+            end
+    end.
 
-seq_key(DbPrefix, Sig) ->
-    Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig},
-    erlfdb_tuple:pack(Key, DbPrefix).
 
+to_map_opts(Options) ->
+    Dir = case lists:keyfind(dir, 1, Options) of
+        {dir, D} -> D;
+        _ -> fwd
+    end,
+
+    InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of
+        {inclusive_end, IE} -> IE;
+        _ -> true
+    end,
+
+    StartKey = case lists:keyfind(start_key, 1, Options) of
+        {start_key, SK} -> SK;
+        false when Dir == fwd -> ebtree:min();
+        false when Dir == rev -> ebtree:max()
+    end,
+
+    EndKey = case lists:keyfind(end_key, 1, Options) of
+        {end_key, EK} -> EK;
+        false when Dir == fwd -> ebtree:max();
+        false when Dir == rev -> ebtree:min()
+    end,
+
+    {Dir, StartKey, EndKey, InclusiveEnd}.
+
+
+gather_update_info(Tx, Mrst, Docs) ->

Review comment:
       This should also go in another module it doesn't use fdb. Also could you add a comment on what is happening here. This is a pretty complex function to follow or It might be worth breaking this up into multiple functions that give a more of an idea of that is happening. 

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -682,6 +767,22 @@ combine_vals(V1, V2) ->
     {dups, [V1, V2]}.
 
 
+expand_dupes([]) ->

Review comment:
       This should also go in a different module

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -244,200 +290,284 @@ clear_index(Db, Signature) ->
     end, Keys),
 
     % Clear index data
-    RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
-    RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
-    erlfdb:clear_range_startswith(Tx, RangePrefix).
+    DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+    DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, DataPrefix),
 
+    % Clear tree data
+    TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+    TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+    erlfdb:clear_range_startswith(Tx, TreePrefix).
 
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-%   `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
 
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
-    #{
-        prefix := Prefix,
-        callback := UserCallback,
-        acc := UserAcc0
-    } = Acc,
-
-    {{_SortKey, DocId}, _DupeId} =
-            erlfdb_tuple:unpack(RowKey, Prefix),
-
-    {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
-    Value = couch_views_encoding:decode(EncodedValue),
-    Key = couch_views_encoding:decode(EncodedOriginalKey),
-
-    UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
-    Acc#{
-        acc := UserAcc1
-    }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
+open_id_tree(TxDb, Sig) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(ViewKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ViewKeys).
-
-
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
-    Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
-    ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    lists:foreach(fun(RemKey) ->
-        {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
-        ok = erlfdb:clear_range(Tx, Start, End)
-    end, ExistingKeys),
-
-    KVsToAdd = process_rows(NewRows),
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
-    lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
-        KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
-        Val = erlfdb_tuple:pack({Key2, EV}),
-        ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
-    end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-    {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
-    lists:map(fun({K, V}) ->
-        {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
-                erlfdb_tuple:unpack(K, DbPrefix),
-        [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
-        {ViewId, TotalKeys, TotalSize, UniqueKeys}
-    end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
+    Prefix = id_tree_prefix(DbPrefix, Sig),
+    TreeOpts = [
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun(id_tree)}
+    ],
+    ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
 
 
-update_row_count(TxDb, Sig, ViewId, Increment) ->
+open_view_tree(TxDb, Sig, _Lang, View) ->
     #{
         tx := Tx,
         db_prefix := DbPrefix
     } = TxDb,
-    Key = row_count_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, Key, Increment).
-
+    #mrview{
+        id_num = ViewId
+    } = View,
+    Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+    TreeOpts = [
+        {collate_fun, couch_views_util:collate_fun(View)},
+        {reduce_fun, make_reduce_fun(View)},
+        {persist_fun, fun persist_chunks/3},
+        {cache_fun, create_cache_fun({view, ViewId})}
+    ],
+    View#mrview{
+        btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+    }.
 
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
 
-    % Track a view specific size for calls to
-    % GET /dbname/_design/doc/_info`
-    IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
-    erlfdb:add(Tx, IdxKey, Increment),
+get_order(id_btree) ->
+    min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+    min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+    2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+    V;
+min_order(V) ->
+    V + 1.
+
+
+make_reduce_fun(#mrview{}) ->
+    fun
+        (KVs, _ReReduce = false) ->
+            TotalSize = lists:foldl(fun({K, V}, Acc) ->
+                KSize = couch_ejson_size:encoded_size(K),
+                VSize = case V of
+                    {dups, Dups} ->
+                        lists:foldl(fun(D, DAcc) ->
+                            DAcc + couch_ejson_size:encoded_size(D)
+                        end, 0, Dups);
+                    _ ->
+                        couch_ejson_size:encoded_size(V)
+                end,
+                KSize + VSize + Acc
+            end, 0, KVs),
+            {length(KVs), TotalSize};
+        (KRs, _ReReduce = true) ->
+            lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
+                {Count + CountAcc, Size + SizeAcc}
+            end, {0, 0}, KRs)
+    end.
 
-    % Track a database level rollup for calls to
-    % GET /dbname
-    DbKey = db_kv_size_key(DbPrefix),
-    erlfdb:add(Tx, DbKey, Increment).
 
+persist_chunks(Tx, set, [Key, Value]) ->
+    Chunks = fabric2_fdb:chunkify_binary(Value),
+    LastId = lists:foldl(fun(Chunk, Id) ->
+        ChunkKey = erlfdb_tuple:pack({Id}, Key),
+        erlfdb:set(Tx, ChunkKey, Chunk),
+        Id + 1
+    end, 0, Chunks),
+
+    % We update nodes in place, so its possible that
+    % a node shrank. This clears any keys that we haven't
+    % just overwritten for the provided key.
+    LastIdKey = erlfdb_tuple:pack({LastId}, Key),
+    EndRange = <<Key/binary, 16#FF>>,
+    erlfdb:clear_range(Tx, LastIdKey, EndRange);
+
+persist_chunks(Tx, get, Key) ->
+    Rows = erlfdb:get_range_startswith(Tx, Key),
+    Values = [V || {_K, V} <- Rows],
+    iolist_to_binary(Values);
+
+persist_chunks(Tx, clear, Key) ->
+    erlfdb:clear_range_startswith(Tx, Key).
+
+
+create_cache_fun(TreeId) ->
+    CacheTid = case get(TreeId) of
+        undefined ->
+            Tid = ets:new(?MODULE, [protected, set]),
+            put(TreeId, {ebtree_cache, Tid}),
+            Tid;
+        {ebtree_cache, Tid} ->
+            Tid
+    end,
+    fun
+        (set, [Id, Node]) ->
+            true = ets:insert_new(CacheTid, {Id, Node}),
+            ok;
+        (clear, Id) ->
+            ets:delete(CacheTid, Id),
+            ok;
+        (get, Id) ->
+            case ets:lookup(CacheTid, Id) of
+                [{Id, Node}] -> Node;
+                [] -> undefined
+            end
+    end.
 
-seq_key(DbPrefix, Sig) ->
-    Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig},
-    erlfdb_tuple:pack(Key, DbPrefix).
 
+to_map_opts(Options) ->
+    Dir = case lists:keyfind(dir, 1, Options) of
+        {dir, D} -> D;
+        _ -> fwd
+    end,
+
+    InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of
+        {inclusive_end, IE} -> IE;
+        _ -> true
+    end,
+
+    StartKey = case lists:keyfind(start_key, 1, Options) of
+        {start_key, SK} -> SK;
+        false when Dir == fwd -> ebtree:min();
+        false when Dir == rev -> ebtree:max()
+    end,
+
+    EndKey = case lists:keyfind(end_key, 1, Options) of
+        {end_key, EK} -> EK;
+        false when Dir == fwd -> ebtree:max();
+        false when Dir == rev -> ebtree:min()
+    end,
+
+    {Dir, StartKey, EndKey, InclusiveEnd}.
+
+
+gather_update_info(Tx, Mrst, Docs) ->
+    % A special token used to indicate that the row should be deleted
+    DeleteRef = erlang:make_ref(),
+
+    AllDocIds = [DocId || #{id := DocId} <- Docs],
+
+    BaseIdMap = lists:foldl(fun(DocId, Acc) ->
+        maps:put(DocId, DeleteRef, Acc)
+    end, #{}, AllDocIds),
+
+    % Build the initial set of rows to delete
+    % ExistingViewKeys is a list of {DocId, [{ViewId, [Key | _]} | _]}
+    ExistingViewKeys = ebtree:lookup_multi(Tx, Mrst#mrst.id_btree, AllDocIds),
+
+    BaseViewMaps = lists:foldl(fun({DocId, ViewIdKeys}, ViewIdAcc1) ->
+        lists:foldl(fun({ViewId, Keys}, ViewIdAcc2) ->
+            OldViewMap = maps:get(ViewId, ViewIdAcc2, #{}),
+            NewViewMap = lists:foldl(fun(Key, ViewMapAcc) ->
+                maps:put({Key, DocId}, DeleteRef, ViewMapAcc)
+            end, OldViewMap, Keys),
+            maps:put(ViewId, NewViewMap, ViewIdAcc2)
+        end, ViewIdAcc1, ViewIdKeys)
+    end, #{}, ExistingViewKeys),
+
+    % Build our base accumulator
+    InfoAcc1 = #{
+        ids => BaseIdMap,
+        views => BaseViewMaps,
+        delete_ref => DeleteRef
+    },
 
-row_count_key(DbPrefix, Sig, ViewId) ->
-    Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, ViewId},
-    erlfdb_tuple:pack(Key, DbPrefix).
+    lists:foldl(fun(Doc, InfoAcc2) ->

Review comment:
       I think it would easier to read if we wrote this as:
   
   ```erlang
      lists:fold(fun
            (#{deleted := true}, InfoAcc2) ->
                  InfoAcc2;
            (Doc, InfoAcc2) ->
              # ... rest goes here
        end).
   ```
   
   

##########
File path: src/couch_views/test/couch_views_red_test.erl
##########
@@ -0,0 +1,764 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_red_test).
+
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include("couch_views.hrl").
+
+
+-define(TDEF(A), {atom_to_list(A), fun A/1}).
+-define(TDEFI(A), {atom_to_list(A), fun A/0}).
+
+
+with(Tests) ->

Review comment:
       Don't we already have these functions defined in fabric2_test?

##########
File path: src/couch_views/test/couch_views_red_test.erl
##########
@@ -0,0 +1,764 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at

Review comment:
       These tests look pretty comprehensive. I also have these elixir tests https://github.com/apache/couchdb/blob/279b9d95a92fad86f30aa13e0c6b3c5f2757d065/test/elixir/test/reduce_builtin_grouplevel_tests.exs
   
   that might also be worth adding them. 

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -126,92 +128,136 @@ set_update_seq(TxDb, Sig, Seq) ->
     ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq).
 
 
-get_row_count(TxDb, #mrst{sig = Sig}, ViewId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    case erlfdb:wait(erlfdb:get(Tx, row_count_key(DbPrefix, Sig, ViewId))) of
-        not_found -> 0; % Can this happen?
-        CountBin -> ?bin2uint(CountBin)
-    end.
+set_trees(TxDb, Mrst) ->
+    #mrst{
+        sig = Sig,
+        language = Lang,
+        views = Views
+    } = Mrst,
+    Mrst#mrst{
+        id_btree = open_id_tree(TxDb, Sig),
+        views = [open_view_tree(TxDb, Sig, Lang, V) || V <- Views]
+    }.
 
 
-get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) ->
+get_row_count(TxDb, View) ->
     #{
-        tx := Tx,
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
-
-    case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of
-        not_found -> 0; % Can this happen?
-        SizeBin -> ?bin2uint(SizeBin)
-    end.
+    {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+    Count.
 
 
-fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) ->
+get_kv_size(TxDb, View) ->
     #{
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
+    {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+    TotalSize.
 
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-    FoldAcc = #{
-        prefix => MapIdxPrefix,
-        callback => Callback,
-        acc => Acc0
-        },
-    Fun = aegis:wrap_fold_fun(TxDb, fun fold_fwd/2),
 
+fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
     #{
-        acc := Acc1
-    } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, FoldAcc, Options),
-
-    Acc1.
+        tx := Tx
+    } = TxDb,
+    #mrview{
+        btree = Btree
+    } = View,
+
+    CollateFun = couch_views_util:collate_fun(View),
+
+    {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+    Wrapper = fun(KVs0, WAcc) ->
+        % Remove any keys that match Start or End key
+        % depending on direction
+        KVs1 = case InclusiveEnd of
+            true ->
+                KVs0;
+            false when Dir == fwd ->
+                lists:filter(fun({K, _V}) ->
+                    case CollateFun(K, EndKey) of
+                        lt -> true;
+                        eq -> false;
+                        gt -> false
+                    end
+                end, KVs0);
+            false when Dir == rev ->
+                lists:filter(fun({K, _V}) ->
+                    case CollateFun(K, EndKey) of
+                        lt -> false;
+                        eq -> false;
+                        gt -> true
+                    end
+                end, KVs0)
+        end,
+        % Expand dups
+        KVs2 = lists:flatmap(fun({K, V}) ->
+            case V of
+                {dups, Dups} when Dir == fwd ->
+                    [{K, D} || D <- Dups];
+                {dups, Dups} when Dir == rev ->
+                    [{K, D} || D <- lists:reverse(Dups)];
+                _ ->
+                    [{K, V}]
+            end
+        end, KVs1),
+        lists:foldl(fun({{Key, DocId}, Value}, WAccInner) ->
+            Callback(DocId, Key, Value, WAccInner)
+        end, WAcc, KVs2)
+    end,
+
+    case Dir of
+        fwd ->
+            ebtree:range(Tx, Btree, StartKey, EndKey, Wrapper, Acc0);
+        rev ->
+            % Start/End keys swapped on purpose because ebtree
+            ebtree:reverse_range(Tx, Btree, EndKey, StartKey, Wrapper, Acc0)
+    end.
 
 
-write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
+update_views(TxDb, Mrst, Docs) ->
     #{
-        id := DocId
-    } = Doc,
-
-    ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
+        tx := Tx
+    } = TxDb,
 
-    clear_id_idx(TxDb, Sig, DocId),
-    lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) ->
-        clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
-        update_row_count(TxDb, Sig, ViewId, -TotalKeys),
-        update_kv_size(TxDb, Sig, ViewId, -TotalSize)
-    end, ExistingViewKeys);
+    % Collect update information
 
-write_doc(TxDb, Sig, ViewIds, Doc) ->
     #{
-        id := DocId,
-        results := Results,
-        kv_sizes := KVSizes
-    } = Doc,
-
-    ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
-
-    clear_id_idx(TxDb, Sig, DocId),
-
-    lists:foreach(fun({ViewId, NewRows, KVSize}) ->
-        update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize),
-
-        ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
-            {ViewId, TotalRows, TotalSize, EKeys} ->
-                RowChange = length(NewRows) - TotalRows,
-                update_row_count(TxDb, Sig, ViewId, RowChange),
-                update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize),
-                EKeys;
-            false ->
-                RowChange = length(NewRows),
-                update_row_count(TxDb, Sig, ViewId, RowChange),
-                update_kv_size(TxDb, Sig, ViewId, KVSize),
-                []
-        end,
-        update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
-    end, lists:zip3(ViewIds, Results, KVSizes)).
+        ids := IdMap,
+        views := ViewMaps,
+        delete_ref := DeleteRef
+    } = gather_update_info(Tx, Mrst, Docs),
+
+    % Generate a list of Keys to delete and Rows to insert from a map
+    UpdateBTree = fun(BTree, Map) ->
+        {ToRemove, ToInsert} = maps:fold(fun(Key, Value, {Keys, Rows}) ->
+            case Value of
+                DeleteRef -> {[Key | Keys], Rows};

Review comment:
       It would be simpler if we deleted the key here instead of looping through it later, and then this fold can return the ones to insert. 

##########
File path: src/couch_views/src/couch_views_fdb.erl
##########
@@ -126,92 +128,136 @@ set_update_seq(TxDb, Sig, Seq) ->
     ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq).
 
 
-get_row_count(TxDb, #mrst{sig = Sig}, ViewId) ->
-    #{
-        tx := Tx,
-        db_prefix := DbPrefix
-    } = TxDb,
-
-    case erlfdb:wait(erlfdb:get(Tx, row_count_key(DbPrefix, Sig, ViewId))) of
-        not_found -> 0; % Can this happen?
-        CountBin -> ?bin2uint(CountBin)
-    end.
+set_trees(TxDb, Mrst) ->
+    #mrst{
+        sig = Sig,
+        language = Lang,
+        views = Views
+    } = Mrst,
+    Mrst#mrst{
+        id_btree = open_id_tree(TxDb, Sig),
+        views = [open_view_tree(TxDb, Sig, Lang, V) || V <- Views]
+    }.
 
 
-get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) ->
+get_row_count(TxDb, View) ->
     #{
-        tx := Tx,
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
-
-    case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of
-        not_found -> 0; % Can this happen?
-        SizeBin -> ?bin2uint(SizeBin)
-    end.
+    {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+    Count.
 
 
-fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) ->
+get_kv_size(TxDb, View) ->
     #{
-        db_prefix := DbPrefix
+        tx := Tx
     } = TxDb,
+    {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+    TotalSize.
 
-    MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-    FoldAcc = #{
-        prefix => MapIdxPrefix,
-        callback => Callback,
-        acc => Acc0
-        },
-    Fun = aegis:wrap_fold_fun(TxDb, fun fold_fwd/2),
 
+fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
     #{
-        acc := Acc1
-    } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, FoldAcc, Options),
-
-    Acc1.
+        tx := Tx
+    } = TxDb,
+    #mrview{
+        btree = Btree
+    } = View,
+
+    CollateFun = couch_views_util:collate_fun(View),
+
+    {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+    Wrapper = fun(KVs0, WAcc) ->
+        % Remove any keys that match Start or End key
+        % depending on direction
+        KVs1 = case InclusiveEnd of
+            true ->
+                KVs0;
+            false when Dir == fwd ->
+                lists:filter(fun({K, _V}) ->
+                    case CollateFun(K, EndKey) of
+                        lt -> true;
+                        eq -> false;
+                        gt -> false
+                    end
+                end, KVs0);
+            false when Dir == rev ->
+                lists:filter(fun({K, _V}) ->
+                    case CollateFun(K, EndKey) of
+                        lt -> false;
+                        eq -> false;
+                        gt -> true
+                    end
+                end, KVs0)
+        end,
+        % Expand dups
+        KVs2 = lists:flatmap(fun({K, V}) ->
+            case V of
+                {dups, Dups} when Dir == fwd ->
+                    [{K, D} || D <- Dups];
+                {dups, Dups} when Dir == rev ->
+                    [{K, D} || D <- lists:reverse(Dups)];
+                _ ->
+                    [{K, V}]
+            end
+        end, KVs1),
+        lists:foldl(fun({{Key, DocId}, Value}, WAccInner) ->
+            Callback(DocId, Key, Value, WAccInner)
+        end, WAcc, KVs2)
+    end,
+
+    case Dir of
+        fwd ->
+            ebtree:range(Tx, Btree, StartKey, EndKey, Wrapper, Acc0);
+        rev ->
+            % Start/End keys swapped on purpose because ebtree
+            ebtree:reverse_range(Tx, Btree, EndKey, StartKey, Wrapper, Acc0)
+    end.
 
 
-write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
+update_views(TxDb, Mrst, Docs) ->
     #{
-        id := DocId
-    } = Doc,
-
-    ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
+        tx := Tx
+    } = TxDb,
 
-    clear_id_idx(TxDb, Sig, DocId),
-    lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) ->
-        clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
-        update_row_count(TxDb, Sig, ViewId, -TotalKeys),
-        update_kv_size(TxDb, Sig, ViewId, -TotalSize)
-    end, ExistingViewKeys);
+    % Collect update information
 
-write_doc(TxDb, Sig, ViewIds, Doc) ->
     #{
-        id := DocId,
-        results := Results,
-        kv_sizes := KVSizes
-    } = Doc,
-
-    ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
-
-    clear_id_idx(TxDb, Sig, DocId),
-
-    lists:foreach(fun({ViewId, NewRows, KVSize}) ->
-        update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize),
-
-        ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
-            {ViewId, TotalRows, TotalSize, EKeys} ->
-                RowChange = length(NewRows) - TotalRows,
-                update_row_count(TxDb, Sig, ViewId, RowChange),
-                update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize),
-                EKeys;
-            false ->
-                RowChange = length(NewRows),
-                update_row_count(TxDb, Sig, ViewId, RowChange),
-                update_kv_size(TxDb, Sig, ViewId, KVSize),
-                []
-        end,
-        update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
-    end, lists:zip3(ViewIds, Results, KVSizes)).
+        ids := IdMap,
+        views := ViewMaps,
+        delete_ref := DeleteRef
+    } = gather_update_info(Tx, Mrst, Docs),
+
+    % Generate a list of Keys to delete and Rows to insert from a map
+    UpdateBTree = fun(BTree, Map) ->

Review comment:
       Could we make this a proper function instead of an anonymous one?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org