You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2020/09/03 17:21:56 UTC

[couchdb] branch prototype/fdb-layer-ebtree-improvements created (now 741573e)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch prototype/fdb-layer-ebtree-improvements
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at 741573e  Implement caching of immutable ebtree nodes

This branch includes the following new commits:

     new 2463a42  Implement ebtree:insert_multi/3
     new 2e7b595  Implement ebtree:lookup_multi/3
     new 741573e  Implement caching of immutable ebtree nodes

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 03/03: Implement caching of immutable ebtree nodes

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-ebtree-improvements
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 741573e61f1fed5f56b6de60addce3d37469b6fa
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Aug 28 11:33:35 2020 -0500

    Implement caching of immutable ebtree nodes
    
    Inner nodes of the B+Tree are now immutable so that they can be cached.
---
 src/ebtree/src/ebtree.erl | 185 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 142 insertions(+), 43 deletions(-)

diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl
index 95e3615..680bf1d 100644
--- a/src/ebtree/src/ebtree.erl
+++ b/src/ebtree/src/ebtree.erl
@@ -49,15 +49,15 @@
     collate_fun,
     reduce_fun,
     encode_fun,
-    persist_fun
+    persist_fun,
+    cache_fun
 }).
 
 -define(META, 0).
 -define(META_ORDER, 0).
--define(META_NEXT_ID, 1).
 
 -define(NODE, 1).
--define(NODE_ROOT_ID, 0).
+-define(NODE_ROOT_ID, <<0>>).
 
 -define(underflow(Tree, Node), Tree#tree.min > length(Node#node.members)).
 -define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)).
@@ -87,13 +87,15 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde
     CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2),
     EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/3),
     PersistFun = proplists:get_value(persist_fun, Options, fun simple_persist/3),
+    CacheFun = proplists:get_value(cache_fun, Options, fun cache_noop/2),
 
     Tree = #tree{
         prefix = Prefix,
         reduce_fun = ReduceFun,
         collate_fun = CollateFun,
         encode_fun = EncodeFun,
-        persist_fun = PersistFun
+        persist_fun = PersistFun,
+        cache_fun = CacheFun
     },
 
     erlfdb:transactional(Db, fun(Tx) ->
@@ -101,7 +103,6 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde
             not_found ->
                 erlfdb:clear_range_startswith(Tx, Prefix),
                 set_meta(Tx, Tree, ?META_ORDER, Order),
-                set_meta(Tx, Tree, ?META_NEXT_ID, 1),
                 set_node(Tx, Tree, #node{id = ?NODE_ROOT_ID}),
                 init_order(Tree, Order);
             ActualOrder when is_integer(ActualOrder) ->
@@ -543,7 +544,7 @@ insert(Db, #tree{} = Tree, Key, Value) ->
         Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID),
         case ?is_full(Tree, Root0) of
             true ->
-                OldRoot = Root0#node{id = new_node_id(Tx, Tree)},
+                OldRoot = Root0#node{id = new_node_id()},
                 FirstKey = first_key(OldRoot),
                 LastKey = last_key(OldRoot),
                 Root1 = #node{
@@ -562,8 +563,8 @@ insert(Db, #tree{} = Tree, Key, Value) ->
 split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) ->
     {LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members),
 
-    LeftId = new_node_id(Tx, Tree),
-    RightId = new_node_id(Tx, Tree),
+    LeftId = new_node_id(),
+    RightId = new_node_id(),
 
     LeftChild = remove_pointers_if_not_leaf(#node{
         id = LeftId,
@@ -600,9 +601,10 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) ->
                 umerge_members(Tree, Parent0#node.level, [{FirstRightKey, LastRightKey, RightId, RightReduction}],
                     lists:keydelete(Child#node.id, 3, Parent0#node.members)))
     },
+    Parent2 = new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1),
     clear_node(Tx, Tree, Child),
-    set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]),
-    {Parent1, LeftChild, RightChild}.
+    set_nodes(Tx, Tree, [LeftChild, RightChild, Parent2]),
+    {Parent2, LeftChild, RightChild}.
 
 
 update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) ->
@@ -626,7 +628,7 @@ insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) ->
         members = umerge_members(Tree, 0, [{Key, Value}], Node0#node.members)
     },
     set_node(Tx, Tree, Node0, Node1),
-    reduce_node(Tree, Node1);
+    {Node1#node.id, reduce_node(Tree, Node1)};
 
 insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) ->
     ChildId0 = find_child_id(Tree, Node0, Key),
@@ -646,16 +648,17 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) ->
             {Node0, Child0}
     end,
     ChildId1 = Child1#node.id,
-    NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value),
+    {ChildId2, NewReduction} = insert_nonfull(Tx, Tree, Child1, Key, Value),
     {CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members),
     [NewFirstKey, _] = sort_keys(Tree, [Key, CurrentFirstKey]),
     [_, NewLastKey] = sort_keys(Tree, [Key, CurrentLastKey]),
     Node2 = Node1#node{
         members = lists:keyreplace(ChildId1, 3, Node1#node.members,
-            {NewFirstKey, NewLastKey, ChildId1, NewReduction})
+            {NewFirstKey, NewLastKey, ChildId2, NewReduction})
     },
-    set_node(Tx, Tree, Node0, Node2),
-    reduce_node(Tree, Node2).
+    Node3 = new_node_id_if_cacheable(Tx, Tree, Node0, Node2),
+    set_node(Tx, Tree, Node0, Node3),
+    {Node3#node.id, reduce_node(Tree, Node2)}.
 
 
 %% @doc Inserts or updates multiple values in the ebtree
@@ -715,8 +718,14 @@ split_node_multi(Tx, Tree, Node) ->
         true when Node#node.id == ?NODE_ROOT_ID ->
             Node#node.members;
         true ->
-            set_node(Tx, Tree, Node),
-            [to_member(Tree, Node)];
+            NewNode = case node_is_cacheable(Node) of
+                true ->
+                    Node#node{id = new_node_id()};
+                false ->
+                    Node
+            end,
+            set_node(Tx, Tree, NewNode),
+            [to_member(Tree, NewNode)];
         false ->
             clear_node(Tx, Tree, Node),
             Nodes0 = create_nodes(Tx, Tree, Node),
@@ -763,14 +772,14 @@ create_nodes(Tx, #tree{} = Tree, Node) ->
         true ->
             {Members, Rest} = lists:split(Tree#tree.min, Node#node.members),
             NewNode = #node{
-                id = new_node_id(Tx, Tree),
+                id = new_node_id(),
                 level = Node#node.level,
                 members = Members
             },
             [NewNode | create_nodes(Tx, Tree, Node#node{members = Rest})];
         false ->
             NewNode = #node{
-                id = new_node_id(Tx, Tree),
+                id = new_node_id(),
                 level = Node#node.level,
                 members = Node#node.members
             },
@@ -845,12 +854,12 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) ->
             Sibling = get_node(Tx, Tree, SiblingId),
             NewNodes = case ?at_min(Tree, Sibling) of
                 true ->
-                    Merged = merge(Tx, Tree, Child1, Sibling),
+                    Merged = merge(Tree, Child1, Sibling),
                     update_prev_neighbour(Tx, Tree, Merged),
                     update_next_neighbour(Tx, Tree, Merged),
                     [Merged];
                 false ->
-                    {Left, Right} = rebalance(Tx, Tree, Child1, Sibling),
+                    {Left, Right} = rebalance(Tree, Child1, Sibling),
                     update_prev_neighbour(Tx, Tree, Left),
                     update_next_neighbour(Tx, Tree, Right),
                     [Left, Right]
@@ -866,28 +875,28 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) ->
             end, Members2, NewNodes),
 
             Parent1 = Parent0#node{
-                %% TODO change id
                 members = Members3
             },
-
+            Parent2 = new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1),
             clear_nodes(Tx, Tree, [Child0, Sibling]),
             set_nodes(Tx, Tree, NewNodes),
-            Parent1;
+            Parent2;
         false ->
             set_node(Tx, Tree, Child0, Child1),
             {_OldFirstKey, _OldLastKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 3, Parent0#node.members),
-            Parent0#node{
+            Parent1 = Parent0#node{
                 members = lists:keyreplace(ChildId0, 3, Parent0#node.members,
                     {first_key(Child1), last_key(Child1), Child1#node.id, reduce_node(Tree, Child1)})
-            }
+            },
+            new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1)
     end.
 
 
-merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) ->
+merge(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) ->
     [Left, Right] = sort_nodes(Tree, [Node1, Node2]),
 
     #node{
-        id = new_node_id(Tx, Tree),
+        id = new_node_id(),
         level = Level,
         prev = Left#node.prev,
         next = Right#node.next,
@@ -895,14 +904,14 @@ merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = N
     }.
 
 
-rebalance(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) ->
+rebalance(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) ->
     [Left0, Right0] = sort_nodes(Tree, [Node1, Node2]),
 
     Members = lists:append(Left0#node.members, Right0#node.members),
     {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members),
 
-    Left1Id = new_node_id(Tx, Tree),
-    Right1Id = new_node_id(Tx, Tree),
+    Left1Id = new_node_id(),
+    Right1Id = new_node_id(),
 
     Left1 = remove_pointers_if_not_leaf(Left0#node{
         id = Left1Id,
@@ -974,10 +983,16 @@ meta_key(Prefix, MetaKey) when is_binary(Prefix) ->
 %% node persistence functions
 
 get_node(Tx, #tree{} = Tree, Id) ->
-    Key = node_key(Tree#tree.prefix, Id),
-    Value = persist(Tree, Tx, get, Key),
-    decode_node(Tree, Id, Key, Value).
-
+    case cache(Tree, get, Id) of
+        undefined ->
+            Key = node_key(Tree#tree.prefix, Id),
+            Value = persist(Tree, Tx, get, Key),
+            Node = decode_node(Tree, Id, Key, Value),
+            cache(Tree, set, [Id, Node]),
+            Node;
+        #node{} = Node ->
+            Node
+    end.
 
 clear_nodes(Tx, #tree{} = Tree, Nodes) ->
     lists:foreach(fun(Node) ->
@@ -987,6 +1002,7 @@ clear_nodes(Tx, #tree{} = Tree, Nodes) ->
 
 clear_node(Tx, #tree{} = Tree, #node{} = Node) ->
      Key = node_key(Tree#tree.prefix, Node#node.id),
+     cache(Tree, clear, Node#node.id),
      persist(Tree, Tx, clear, Key).
 
 
@@ -1007,10 +1023,11 @@ set_node(Tx, #tree{} = Tree, #node{} = Node) ->
     validate_node(Tree, Node),
     Key = node_key(Tree#tree.prefix, Node#node.id),
     Value = encode_node(Tree, Key, Node),
+    cache(Tree, set, [Node#node.id, Node]),
     persist(Tree, Tx, set, [Key, Value]).
 
 
-node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) ->
+node_key(Prefix, Id) when is_binary(Prefix), is_binary(Id) ->
     erlfdb_tuple:pack({?NODE, Id}, Prefix).
 
 
@@ -1202,7 +1219,7 @@ collate_raw(A, A) ->
 %% encoding function
 
 encode_erlang(encode, _Key, Value) ->
-    term_to_binary(Value, [compressed, {minor_version, 2}]);
+    term_to_binary(Value, [{minor_version, 2}]);
 
 
 encode_erlang(decode, _Key, Value) ->
@@ -1225,6 +1242,37 @@ simple_persist(Tx, clear, Key) ->
     erlfdb:clear(Tx, Key).
 
 
+%% cache functions
+
+cache_noop(set, _) ->
+    ok;
+cache_noop(clear, _) ->
+    ok;
+cache_noop(get, _) ->
+    undefined.
+
+
+cache(#tree{} = Tree, set, [Id, #node{} = Node]) ->
+    #tree{cache_fun = CacheFun} = Tree,
+    case node_is_cacheable(Node) of
+        true ->
+            CacheFun(set, [Id, Node]);
+        false ->
+            ok
+    end;
+
+cache(#tree{} = Tree, clear, Id) ->
+    #tree{cache_fun = CacheFun} = Tree,
+    CacheFun(clear, Id);
+
+cache(#tree{} = _Tree, get, ?NODE_ROOT_ID) ->
+    undefined;
+
+cache(#tree{} = Tree, get, Id) ->
+    #tree{cache_fun = CacheFun} = Tree,
+    CacheFun(get, Id).
+
+
 %% private functions
 
 init_order(#tree{} = Tree, Order)
@@ -1254,10 +1302,30 @@ last_key(Members) when is_list(Members) ->
     end.
 
 
-new_node_id(Tx, Tree) ->
-    NextId = get_meta(Tx, Tree, ?META_NEXT_ID),
-    set_meta(Tx, Tree, ?META_NEXT_ID, NextId + 1),
-    NextId.
+new_node_id_if_cacheable(Tx, #tree{} = Tree, #node{} = Old, #node{} = New) ->
+    MembersChanged = Old#node.members /= New#node.members,
+    NodeIsCacheable = node_is_cacheable(New),
+    if
+        MembersChanged andalso NodeIsCacheable ->
+            clear_node(Tx, Tree, New),
+            New#node{id = new_node_id()};
+        true ->
+            New
+    end.
+
+
+node_is_cacheable(#node{id = ?NODE_ROOT_ID}) ->
+    false;
+
+node_is_cacheable(#node{level = 0}) ->
+    false;
+
+node_is_cacheable(#node{}) ->
+    true.
+
+
+new_node_id() ->
+    crypto:strong_rand_bytes(16).
 
 
 %% remove prev/next pointers for nonleaf nodes
@@ -1268,11 +1336,24 @@ remove_pointers_if_not_leaf(#node{} = Node) ->
     Node#node{prev = undefined, next = undefined}.
 
 
+
+print_node(#node{level = 0} = Node) ->
+    io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~w}~n~n",
+        [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next),
+        Node#node.members]);
+
 print_node(#node{} = Node) ->
-    io:format("#node{id = ~w, level = ~w, prev = ~w, next = ~w, members = ~w}~n~n",
-        [Node#node.id, Node#node.level, Node#node.prev, Node#node.next, Node#node.members]).
+    io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n",
+        [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next),
+        [io_lib:format("{~w, ~w, ~s, ~w}, ", [F, L, b64(P), R]) || {F, L, P, R} <- Node#node.members]]).
 
 
+b64(undefined) ->
+    undefined;
+
+b64(Bin) ->
+    base64:encode(Bin).
+
 %% tests
 
 -ifdef(TEST).
@@ -1679,4 +1760,22 @@ validate_node_test_() ->
     ].
 
 
+cache_test_() ->
+    {spawn, [fun() ->
+        Db = erlfdb_util:get_test_db([empty]),
+        CacheFun = fun
+            (set, [Id, Node]) ->
+                erlang:put(Id, Node);
+            (clear, Id) ->
+                erlang:erase(Id);
+            (get, Id) ->
+                erlang:get(Id)
+        end,
+        Tree = open(Db, <<1,2,3>>, 4, [{cache_fun, CacheFun}]),
+        [ebtree:insert(Db, Tree, I, I) || I <- lists:seq(1, 16)],
+        ?assertEqual({1, 1}, ebtree:lookup(Db, Tree, 1)),
+        NodeCache = [V || {_K, V} <- erlang:get(), is_record(V, node)],
+        ?assertEqual(3, length(NodeCache))
+    end]}.
+
 -endif.


[couchdb] 02/03: Implement ebtree:lookup_multi/3

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-ebtree-improvements
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2e7b595589f61d41395e511a55a09e2d3b514e01
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Aug 21 13:29:42 2020 -0500

    Implement ebtree:lookup_multi/3
    
    This allows looking up multiple keys simultaneously which reduces the
    amount of overhead due to node serialization and collation.
---
 src/ebtree/src/ebtree.erl | 61 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl
index 84e3183..95e3615 100644
--- a/src/ebtree/src/ebtree.erl
+++ b/src/ebtree/src/ebtree.erl
@@ -21,6 +21,7 @@
      insert_multi/3,
      delete/3,
      lookup/3,
+     lookup_multi/3,
      range/6,
      reverse_range/6,
      fold/4,
@@ -149,6 +150,55 @@ lookup(Db, #tree{} = Tree, Key) ->
     fold(Db, Tree, Fun, false, []).
 
 
+%% @doc Lookup a list of keys in the ebtree.
+%% @param Db An erlfdb database or transaction.
+%% @param Tree the ebtree.
+%% @param Keys the list of keys to lookup
+%% @returns A list containing key/value tuples for keys that were found
+-spec lookup_multi(Db :: term(), Tree :: #tree{}, Key :: [term()]) ->
+    [{Key :: term(), Value :: term()}].
+lookup_multi(Db, #tree{} = Tree, Keys) ->
+    FoldFun = fun lookup_multi_fold/2,
+    Acc = {Tree, sort_keys(Tree, Keys), []},
+    {_, _, FoundKeys} = fold(Db, Tree, FoldFun, Acc, []),
+    FoundKeys.
+
+
+lookup_multi_fold(_, {_, [], _} = Acc) ->
+    % No more keys to find
+    {stop, Acc};
+
+lookup_multi_fold({visit, Key1, Value}, {Tree, [Key2 | Rest], Acc}) ->
+    {NewKeys, NewAcc} = case collate(Tree, Key1, Key2) of
+        lt ->
+            % Still looking for the next user key
+            {[Key2 | Rest], Acc};
+        eq ->
+            % Found a requested key
+            {Rest, [{Key2, Value} | Acc]};
+        gt ->
+            % The user key wasn't found so we drop it
+            {Rest, Acc}
+    end,
+    {ok, {Tree, NewKeys, NewAcc}};
+
+lookup_multi_fold({traverse, FKey, LKey, R}, {Tree, [UKey | Rest], Acc}) ->
+    case collate(Tree, FKey, UKey, [gt]) of
+        true ->
+            % We've passed by our first user key
+            lookup_multi_fold({traverse, FKey, LKey, R}, {Tree, Rest, Acc});
+        false ->
+            case collate(Tree, UKey, LKey, [lt, eq]) of
+                true ->
+                    % Key might be in this range
+                    {ok, {Tree, [UKey | Rest], Acc}};
+                false ->
+                    % Next key is not in range
+                    {skip, {Tree, [UKey | Rest], Acc}}
+            end
+    end.
+
+
 %% @equiv fold(Db, Tree, Fun, Acc, [])
 fold(Db, #tree{} = Tree, Fun, Acc) ->
     fold(Db, Tree, Fun, Acc, []).
@@ -1308,6 +1358,17 @@ lookup_test() ->
     ?assertEqual(false, lookup(Db, Tree, 101)).
 
 
+lookup_multi_test() ->
+    Db = erlfdb_util:get_test_db([empty]),
+    Tree = open(Db, <<1,2,3>>, 4),
+    Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])],
+    lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys),
+    validate_tree(Db, Tree),
+    ?assertEqual([{1, 2}], lookup_multi(Db, Tree, [1])),
+    ?assertEqual([{15, 16}, {2, 3}], lookup_multi(Db, Tree, [2, 15])),
+    ?assertEqual([{15, 16}, {4, 5}, {2, 3}], lookup_multi(Db, Tree, [2, 101, 15, 4, -3])).
+
+
 insert_multi_test() ->
     Db = erlfdb_util:get_test_db([empty]),
     Tree = open(Db, <<1, 2, 3>>, 4),


[couchdb] 01/03: Implement ebtree:insert_multi/3

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-ebtree-improvements
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2463a429e168b38cb57f84dd5b25366bf83eff78
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Tue Aug 18 14:58:47 2020 -0500

    Implement ebtree:insert_multi/3
    
    This allows for batch insertion of keys in order to minimize node
    serialization and collation costs.
---
 src/ebtree/src/ebtree.erl | 164 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)

diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl
index 536d3b1..84e3183 100644
--- a/src/ebtree/src/ebtree.erl
+++ b/src/ebtree/src/ebtree.erl
@@ -18,6 +18,7 @@
      min/0,
      max/0,
      insert/4,
+     insert_multi/3,
      delete/3,
      lookup/3,
      range/6,
@@ -607,6 +608,155 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) ->
     reduce_node(Tree, Node2).
 
 
+%% @doc Inserts or updates multiple values in the ebtree
+%% @param Db An erlfdb database or transaction.
+%% @param Tree The ebtree.
+%% @param KeyValues A list of two-tuples representing the key/values to insert
+%% @returns the tree.
+-spec insert_multi(Db :: term(), Tree :: #tree{}, KeyValues :: [{term(), term()}]) -> #tree{}.
+insert_multi(_Db, #tree{} = Tree, []) ->
+    Tree;
+
+insert_multi(Db, #tree{} = Tree, KeyValues) when is_list(KeyValues) ->
+    % Sort our KeyValues so that we can insert in order
+    SortedKeyValues = usort_members(Tree, 0, KeyValues),
+    erlfdb:transactional(Db, fun(Tx) ->
+        Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID),
+        Members = insert_multi(Tx, Tree, Root0, SortedKeyValues),
+        Root1 = grow_tree(Tx, Tree, Root0#node{members = Members}),
+        set_node(Tx, Tree, Root1)
+    end),
+    Tree.
+
+
+insert_multi(Tx, #tree{} = Tree, #node{level = L} = Node, KeyValues) when L > 0 ->
+    ChildKVsPairs = assign_kvs(Tree, Node#node.members, KeyValues),
+    NewMembers = lists:flatmap(fun({{_F, _L, P, _R} = Child, KVs}) ->
+        case KVs of
+            [] ->
+                [Child];
+            _ ->
+                ChildNode = get_node(Tx, Tree, P),
+                insert_multi(Tx, Tree, ChildNode, KVs)
+        end
+    end, ChildKVsPairs),
+    split_node_multi(Tx, Tree, Node#node{members = NewMembers});
+
+insert_multi(Tx, #tree{} = Tree, #node{level = 0} = Node, KeyValues) ->
+    NewMembers = umerge_members(Tree, 0, KeyValues, Node#node.members),
+    split_node_multi(Tx, Tree, Node#node{members = NewMembers}).
+
+
+assign_kvs(_Tree, [Child], KeyValues) ->
+    [{Child, KeyValues}];
+
+assign_kvs(Tree, [{_F, L, _P, _R} = Child | RestChildren], KeyValues) ->
+    {KVsInChild, RestKVs} = lists:splitwith(fun({Key, _}) ->
+        collate(Tree, Key, L, [lt, eq])
+    end, KeyValues),
+    [{Child, KVsInChild} | assign_kvs(Tree, RestChildren, RestKVs)].
+
+
+split_node_multi(Tx, Tree, Node) ->
+    NumMembers = length(Node#node.members),
+    % Not =< so that we don't leave full nodes
+    % in the tree after update.
+    case NumMembers < Tree#tree.max of
+        true when Node#node.id == ?NODE_ROOT_ID ->
+            Node#node.members;
+        true ->
+            set_node(Tx, Tree, Node),
+            [to_member(Tree, Node)];
+        false ->
+            clear_node(Tx, Tree, Node),
+            Nodes0 = create_nodes(Tx, Tree, Node),
+            Nodes1 = if Node#node.level > 0 -> Nodes0; true ->
+                Nodes2 = update_next_ptrs(Nodes0),
+                Nodes3 = update_prev_ptrs(Nodes2),
+                Nodes4 = set_first_prev_ptr(Tx, Tree, Node#node.prev, Nodes3),
+                set_last_next_ptr(Tx, Tree, Node#node.next, Nodes4)
+            end,
+            set_nodes(Tx, Tree, Nodes1),
+            [to_member(Tree, N) || N <- Nodes1]
+    end.
+
+
+grow_tree(_Tx, _Tree, #node{level = 0, members = [{_, _} | _]} = Root) ->
+    Root;
+
+grow_tree(Tx, Tree, #node{level = 0, members = [{_, _, _, _} | _]} = Root) ->
+    grow_tree(Tx, Tree, Root#node{level = 1});
+
+grow_tree(Tx, Tree, Root) ->
+    case length(Root#node.members) < Tree#tree.max of
+        true ->
+            Root;
+        false ->
+            NewMembers = split_node_multi(Tx, Tree, Root),
+            NewRoot = Root#node{
+                level = Root#node.level + 1,
+                members = NewMembers
+            },
+            grow_tree(Tx, Tree, NewRoot)
+    end.
+
+
+to_member(Tree, Node) ->
+    FirstKey = first_key(Node#node.members),
+    LastKey = last_key(Node#node.members),
+    Reds = reduce_node(Tree, Node),
+    {FirstKey, LastKey, Node#node.id, Reds}.
+
+
+create_nodes(Tx, #tree{} = Tree, Node) ->
+    case length(Node#node.members) >= Tree#tree.max of
+        true ->
+            {Members, Rest} = lists:split(Tree#tree.min, Node#node.members),
+            NewNode = #node{
+                id = new_node_id(Tx, Tree),
+                level = Node#node.level,
+                members = Members
+            },
+            [NewNode | create_nodes(Tx, Tree, Node#node{members = Rest})];
+        false ->
+            NewNode = #node{
+                id = new_node_id(Tx, Tree),
+                level = Node#node.level,
+                members = Node#node.members
+            },
+            [NewNode]
+    end.
+
+
+update_next_ptrs([_] = Nodes) ->
+    Nodes;
+
+update_next_ptrs([N1, N2 | Rest]) ->
+    [N1#node{next = N2#node.id} | update_next_ptrs([N2 | Rest])].
+
+
+update_prev_ptrs([_] = Nodes) ->
+    Nodes;
+
+update_prev_ptrs([N1, N2 | Rest]) ->
+    [N1 | update_prev_ptrs([N2#node{prev = N1#node.id} | Rest])].
+
+
+set_first_prev_ptr(Tx, Tree, Prev, [Node | Rest]) ->
+    NewNode = Node#node{prev = Prev},
+    update_prev_neighbour(Tx, Tree, NewNode),
+    [NewNode | Rest].
+
+
+set_last_next_ptr(Tx, Tree, Next, [Node0]) ->
+    Node1 = Node0#node{next = Next},
+    update_next_neighbour(Tx, Tree, Node1),
+    [Node1];
+
+set_last_next_ptr(Tx, Tree, Next, [N | Rest]) ->
+    [N | set_last_next_ptr(Tx, Tree, Next, Rest)].
+
+
 %% @doc Deletes an entry from the ebtree
 %% @param Db An erlfdb database or transaction.
 %% @param Tree The ebtree.
@@ -1158,6 +1308,20 @@ lookup_test() ->
     ?assertEqual(false, lookup(Db, Tree, 101)).
 
 
+insert_multi_test() ->
+    Db = erlfdb_util:get_test_db([empty]),
+    Tree = open(Db, <<1, 2, 3>>, 4),
+    AllKVs = lists:foldl(fun(_Seq, Acc) ->
+        KVs = [{rand:uniform(), rand:uniform()} || _ <- lists:seq(1, 16)],
+        insert_multi(Db, Tree, KVs),
+        KVs ++ Acc
+    end, [], lists:seq(1, 16)),
+    lists:foreach(fun({K, V}) ->
+        ?assertEqual({K, V}, lookup(Db, Tree, K))
+    end, AllKVs),
+    validate_tree(Db, Tree).
+
+
 delete_test() ->
     Db = erlfdb_util:get_test_db([empty]),
     Tree = open(Db, <<1,2,3>>, 4),