You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2014/08/06 18:56:58 UTC

[1/4] couch commit: updated refs/heads/windsor-merge-325 to d75dca1

Repository: couchdb-couch
Updated Branches:
  refs/heads/windsor-merge-325 [created] d75dca18e


Be more specific on the merge result

We have three results that can happen when merging a path into a
revision tree:

 * We can extend a branch which replaces a leaf
 * We can create a new branch which results in a new leaf
 * We can land on an existing internal node

The first result (new_leaf) means that there is no conflict in the
update operation. The second (new_branch) means we branched a revision
path which means the operation introduces a conflict. The third
(internal_node) means that the merge was the result of an edit that
has already been applied to this document.

For the third case we have a subtle special case in that if we have
deleted the document and want to recreate it into the same initial
state we need to give the new state a different revision. The current
code follows the edit path and extends the first deleted leaf it
finds.

BugzID: 25150


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/050fea27
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/050fea27
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/050fea27

Branch: refs/heads/windsor-merge-325
Commit: 050fea27d77fe8282a85aab940e056c92e718fe1
Parents: 2c2f53e
Author: Paul J. Davis <pa...@gmail.com>
Authored: Fri Nov 15 12:13:17 2013 -0600
Committer: Robert Newson <rn...@apache.org>
Committed: Wed Aug 6 15:17:58 2014 +0100

----------------------------------------------------------------------
 include/couch_db.hrl   |   2 -
 src/couch_key_tree.erl | 218 +++++++++++++++++++++++++-------------------
 2 files changed, 126 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/050fea27/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index c57a583..570d3db 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -42,10 +42,8 @@
 -define(LOG_WARN(Format, Args), couch_log:warning(Format, Args)).
 -define(LOG_ERROR(Format, Args), couch_log:error(Format, Args)).
 
-% Tree::term() is really a tree(), but we don't want to require R13B04 yet
 -type branch() :: {Key::term(), Value::term(), Tree::term()}.
 -type path() :: {Start::pos_integer(), branch()}.
--type tree() :: [branch()]. % sorted by key
 
 -record(rev_info, {
     rev,

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/050fea27/src/couch_key_tree.erl
----------------------------------------------------------------------
diff --git a/src/couch_key_tree.erl b/src/couch_key_tree.erl
index bf7e61c..1659547 100644
--- a/src/couch_key_tree.erl
+++ b/src/couch_key_tree.erl
@@ -65,104 +65,138 @@ stem/2
 ]).
 
 -include_lib("couch/include/couch_db.hrl").
-
-%% @doc Merge a path with a list of paths and stem to the given length.
--spec merge([path()], path(), pos_integer()) -> {[path()],
-    conflicts | no_conflicts}.
-merge(Paths, Path, Depth) ->
-    {Merged, Conflicts} = merge(Paths, Path),
-    {stem(Merged, Depth), Conflicts}.
-
-%% @doc Merge a path with an existing list of paths, returning a new list of
-%% paths. A return of conflicts indicates a new conflict was discovered in this
-%% merge. Conflicts may already exist in the original list of paths.
--spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}.
-merge(Paths, Path) ->
-    {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false),
-    if HasConflicts ->
-        Conflicts = conflicts;
-    (length(Merged) =/= length(Paths)) and (length(Merged) =/= 1) ->
-        Conflicts = conflicts;
-    true ->
-        Conflicts = no_conflicts
-    end,
-    {lists:sort(Merged), Conflicts}.
-
--spec merge_one(Original::[path()], Inserted::path(), [path()], boolean()) ->
-    {ok, Merged::[path()], NewConflicts::boolean()}.
-merge_one([], Insert, OutAcc, ConflictsAcc) ->
-    {ok, [Insert | OutAcc], ConflictsAcc};
-merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, Acc, HasConflicts) ->
-    case merge_at([Tree], StartInsert - Start, [TreeInsert]) of
-    {ok, [Merged], Conflicts} ->
-        MergedStart = lists:min([Start, StartInsert]),
-        {ok, Rest ++ [{MergedStart, Merged} | Acc], Conflicts or HasConflicts};
-    no ->
-        AccOut = [{Start, Tree} | Acc],
-        merge_one(Rest, {StartInsert, TreeInsert}, AccOut, HasConflicts)
+-type treenode() :: {Key::term(), Value::term(), [Node::treenode()]}.
+-type tree() :: {Depth::pos_integer(), [treenode()]}.
+-type revtree() :: [tree()].
+
+
+%% @doc Merge a path into the given tree and then stem the result.
+%% Although Tree is of type tree(), it must not contain any branches.
+-spec merge(revtree(), tree(), pos_integer()) ->
+                {revtree(), new_leaf | new_branch | internal_node}.
+merge(RevTree, Tree, StemDepth) ->
+    {Merged, Result} = merge(RevTree, Tree),
+    {stem(Merged, StemDepth), Result}.
+
+
+%% @doc Merge a path into a tree.
+-spec merge(revtree(), tree()) ->
+                {revtree(), new_leaf | new_branch | internal_node}.
+merge(RevTree, Tree) ->
+    {Merged, Result} = merge_tree(RevTree, Tree, []),
+    {lists:sort(Merged), Result}.
+
+%% @private
+%% @doc Attempt to merge Tree into each branch of the RevTree.
+%% If it can't find a branch that the new tree merges into, add it as a
+%% new branch in the RevTree.
+-spec merge_tree(revtree(), tree(), revtree()) -> {revtree(), boolean()}.
+merge_tree([], Tree, []) ->
+    {[Tree], new_leaf};
+merge_tree([], Tree, MergeAcc) ->
+    {[Tree|MergeAcc], new_branch};
+merge_tree([{Depth, Nodes} | Rest], {IDepth, INodes}=Tree, MergeAcc) ->
+    % For the intrepid observer following along at home, notice what we're
+    % doing here with (Depth - IDepth). This tells us which of the two
+    % branches (Nodes or INodes) we need to seek into. If Depth > IDepth
+    % that means we need go into INodes to find where we line up with
+    % Nodes. If Depth < IDepth, its obviously the other way. If it turns
+    % out that (Depth - IDepth) == 0, then we know that this is where
+    % we begin our actual merge operation (ie, looking for key matches).
+    % Its helpful to note that this whole moving into sub-branches is due
+    % to how we store trees that have been stemmed. When a path is
+    % stemmed so that the root node is lost, we wrap it in a tuple with
+    % the number keys that have been droped. This number is the depth
+    % value that's used throughout this module.
+    case merge_at([Nodes], Depth - IDepth, [INodes]) of
+        {[Merged], Result} ->
+            NewDepth = erlang:min(Depth, IDepth),
+            {Rest ++ [{NewDepth, Merged} | MergeAcc], Result};
+        fail ->
+            merge_tree(Rest, Tree, [{Depth, Nodes} | MergeAcc])
     end.
 
--spec merge_at(tree(), Place::integer(), tree()) ->
-    {ok, Merged::tree(), HasConflicts::boolean()} | no.
-merge_at(_Ours, _Place, []) ->
-    no;
-merge_at([], _Place, _Insert) ->
-    no;
-merge_at([{Key, Value, SubTree}|Sibs], Place, InsertTree) when Place > 0 ->
-    % inserted starts later than committed, need to drill into committed subtree
-    case merge_at(SubTree, Place - 1, InsertTree) of
-    {ok, Merged, Conflicts} ->
-        {ok, [{Key, Value, Merged} | Sibs], Conflicts};
-    no ->
-        % first branch didn't merge, move to next branch
-        case merge_at(Sibs, Place, InsertTree) of
-        {ok, Merged, Conflicts} ->
-            {ok, [{Key, Value, SubTree} | Merged], Conflicts};
-        no ->
-            no
-        end
+%% @private
+%% @doc Locate the point at which merging can start.
+%% Because of stemming we may need to seek into one of the branches
+%% before we can start comparing node keys. If one of the branches
+%% ends up running out of nodes we know that these two branches can
+%% not be merged.
+-spec merge_at([node()], integer(), [node()]) -> {revtree(), boolean()} | fail.
+merge_at(_Nodes, _Pos, []) ->
+    fail;
+merge_at([], _Pos, _INodes) ->
+    fail;
+merge_at(Nodes, Pos, [{IK, IV, [NextINode]}]) when Pos > 0 ->
+    % Depth was bigger than IDepth, so we need to discard from the
+    % insert path to find where it might start matching.
+    case merge_at(Nodes, Pos - 1, [NextINode]) of
+        {Merged, Result} -> {[{IK, IV, Merged}], Result};
+        fail -> fail
     end;
-merge_at(OurTree, Place, [{Key, Value, SubTree}]) when Place < 0 ->
-    % inserted starts earlier than committed, need to drill into insert subtree
-    case merge_at(OurTree, Place + 1, SubTree) of
-    {ok, Merged, Conflicts} ->
-        {ok, [{Key, Value, Merged}], Conflicts};
-    no ->
-        no
+merge_at([{K, V, SubTree} | Sibs], Pos, INodes) when Pos < 0 ->
+    % When Pos is negative, Depth was less than IDepth, so we
+    % need to discard from the revision tree path
+    case merge_at(SubTree, Pos + 1, INodes) of
+        {Merged, Result} ->
+            {[{K, V, Merged} | Sibs], Result};
+        fail ->
+            % Merging along the subtree failed. We need to also try
+            % merging the insert branch against the siblings of this
+            % node.
+            case merge_at(Sibs, Pos, INodes) of
+                {Merged, Result} -> {[{K, V, SubTree} | Merged], Result};
+                fail -> fail
+            end
     end;
-merge_at([{Key, V1, SubTree}|Sibs], 0, [{Key, V2, InsertSubTree}]) ->
-    {Merged, Conflicts} = merge_simple(SubTree, InsertSubTree),
-    {ok, [{Key, value_pref(V1, V2), Merged} | Sibs], Conflicts};
-merge_at([{OurKey, _, _} | _], 0, [{Key, _, _}]) when OurKey > Key ->
-    % siblings keys are ordered, no point in continuing
-    no;
-merge_at([Tree | Sibs], 0, InsertTree) ->
-    case merge_at(Sibs, 0, InsertTree) of
-    {ok, Merged, Conflicts} ->
-        {ok, [Tree | Merged], Conflicts};
-    no ->
-        no
+merge_at([{K, V1, Nodes} | Sibs], 0, [{K, V2, INodes}]) ->
+    % Keys are equal. At this point we have found a possible starting
+    % position for our merge to take place.
+    {Merged, Result} = merge_extend(Nodes, INodes),
+    {[{K, value_pref(V1, V2), Merged} | Sibs], Result};
+merge_at([{K1, _, _} | _], 0, [{K2, _, _}]) when K1 > K2 ->
+    % Siblings keys are ordered, no point in continuing
+    fail;
+merge_at([Tree | Sibs], 0, INodes) ->
+    % INodes key comes after this key, so move on to the next sibling.
+    case merge_at(Sibs, 0, INodes) of
+        {Merged, Result} -> {[Tree | Merged], Result};
+        fail -> fail
     end.
 
-% key tree functions
-
--spec merge_simple(tree(), tree()) -> {Merged::tree(), NewConflicts::boolean()}.
-merge_simple([], B) ->
-    {B, false};
-merge_simple(A, []) ->
-    {A, false};
-merge_simple([{Key, V1, SubA} | NextA], [{Key, V2, SubB} | NextB]) ->
-    {MergedSubTree, Conflict1} = merge_simple(SubA, SubB),
-    {MergedNextTree, Conflict2} = merge_simple(NextA, NextB),
-    Value = value_pref(V1, V2),
-    {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2};
-merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B ->
-    {Merged, Conflict} = merge_simple(Next, Insert),
-    % if Merged has more branches than the input we added a new conflict
-    {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))};
-merge_simple(Ours, [Tree | Next]) ->
-    {Merged, Conflict} = merge_simple(Ours, Next),
-    {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}.
+-spec merge_extend(tree(), tree()) -> {tree(), atom()}.
+merge_extend([], B) when B =/= [] ->
+    % Most likely the insert branch simply extends this one, so the new
+    % branch is exactly B. Its also possible that B is a branch because
+    % its key sorts greater than all siblings of an internal node. This
+    % condition is checked in the last clause of this function and the
+    % new_leaf result is fixed to be new_branch.
+    {B, new_leaf};
+merge_extend(A, []) ->
+    % Insert branch ends an internal node in our original revtree()
+    % so the end result is exactly our original revtree.
+    {A, internal_node};
+merge_extend([{K, V1, SubA} | NextA], [{K, V2, SubB}]) ->
+    % Here we're simply extending the path to the next deeper
+    % level in the two branches.
+    {Merged, Result} = merge_extend(SubA, SubB),
+    {[{K, value_pref(V1, V2), Merged} | NextA], Result};
+merge_extend([{K1, _, _}=NodeA | Rest], [{K2, _, _}=NodeB]) when K1 > K2 ->
+    % Keys are ordered so we know this is where the insert branch needs
+    % to be inserted into the tree. We also know that this creates a new
+    % branch so we have a new leaf to report.
+    {[NodeB, NodeA | Rest], new_branch};
+merge_extend([Tree | RestA], NextB) ->
+    % Here we're moving on to the next sibling to try and extend our
+    % merge even deeper. The length check is due to the fact that the
+    % key in NextB might be larger than the largest key in RestA which
+    % means we've created a new branch.
+    {Merged, Result0} = merge_extend(RestA, NextB),
+    Result = case length(Merged) == length(RestA) of
+        true -> Result0;
+        false -> new_branch
+    end,
+    {[Tree | Merged], Result}.
 
 find_missing(_Tree, []) ->
     [];


[3/4] couch commit: updated refs/heads/windsor-merge-325 to d75dca1

Posted by rn...@apache.org.
Rewrite merge_rev_trees to handle new merge output

This commit adds a complete rewrite of couch_db_updater:merge_rev_trees
to support the new couch_key_tree:merge output. Since
couch_key_tree:merge now returns the position of the merged path in the
tree, merge_rev_trees can explicitly check if a document is being
recreated and behave accordingly.

BugzID: 25150

davisp needs to check this one.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d3937f59
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d3937f59
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d3937f59

Branch: refs/heads/windsor-merge-325
Commit: d3937f59a0c2f66631723fd542f83b68a7a149f7
Parents: 68a1012
Author: Paul J. Davis <pa...@gmail.com>
Authored: Fri Nov 15 13:17:15 2013 -0600
Committer: Robert Newson <rn...@apache.org>
Committed: Wed Aug 6 17:19:15 2014 +0100

----------------------------------------------------------------------
 src/couch_db_updater.erl | 181 ++++++++++++++++++++++++------------------
 1 file changed, 104 insertions(+), 77 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d3937f59/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 3ff5963..4e2a0d6 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -629,90 +629,115 @@ merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) ->
     {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq};
 merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
         [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
-    #full_doc_info{id=Id,rev_tree=OldTree,deleted=OldDeleted0,update_seq=OldSeq}
-            = OldDocInfo,
-    {NewRevTree, _} = lists:foldl(
-        fun({Client, #doc{revs={Pos,[_Rev|PrevRevs]}}=NewDoc}, {AccTree, OldDeleted}) ->
-            if not MergeConflicts ->
-                case couch_key_tree:merge(AccTree, couch_doc:to_path(NewDoc),
-                    Limit) of
-                {_NewTree, conflicts} when (not OldDeleted) ->
-                    send_result(Client, NewDoc, conflict),
-                    {AccTree, OldDeleted};
-                {NewTree, conflicts} when PrevRevs /= [] ->
-                    % Check to be sure if prev revision was specified, it's
-                    % a leaf node in the tree
-                    Leafs = couch_key_tree:get_all_leafs(AccTree),
-                    IsPrevLeaf = lists:any(fun({_, {LeafPos, [LeafRevId|_]}}) ->
-                            {LeafPos, LeafRevId} == {Pos-1, hd(PrevRevs)}
-                        end, Leafs),
-                    if IsPrevLeaf ->
-                        {NewTree, OldDeleted};
-                    true ->
-                        send_result(Client, NewDoc, conflict),
-                        {AccTree, OldDeleted}
-                    end;
-                {NewTree, no_conflicts} when  AccTree == NewTree ->
-                    % the tree didn't change at all
-                    % meaning we are saving a rev that's already
-                    % been editted again.
-                    if (Pos == 1) and OldDeleted ->
-                        % this means we are recreating a brand new document
-                        % into a state that already existed before.
-                        % put the rev into a subsequent edit of the deletion
-                        #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} =
-                                couch_doc:to_doc_info(OldDocInfo),
-                        NewRevId = couch_db:new_revid(
-                                NewDoc#doc{revs={OldPos, [OldRev]}}),
-                        NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},
-                        {NewTree2, _} = couch_key_tree:merge(AccTree,
-                                couch_doc:to_path(NewDoc2), Limit),
-                        % we changed the rev id, this tells the caller we did
-                        send_result(Client, NewDoc,
-                                {ok, {OldPos + 1, NewRevId}}),
-                        {NewTree2, OldDeleted};
-                    true ->
-                        send_result(Client, NewDoc, conflict),
-                        {AccTree, OldDeleted}
-                    end;
-                {NewTree, _} ->
-                    {NewTree, NewDoc#doc.deleted}
-                end;
-            true ->
-                {NewTree, _} = couch_key_tree:merge(AccTree,
-                            couch_doc:to_path(NewDoc), Limit),
-                {NewTree, OldDeleted}
-            end
-        end,
-        {OldTree, OldDeleted0}, NewDocs),
-    if NewRevTree == OldTree ->
+    NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
+        merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts)
+    end, OldDocInfo, NewDocs),
+    % When MergeConflicts is false, we updated #full_doc_info.deleted on every
+    % iteration of merge_rev_tree. However, merge_rev_tree does not update
+    % #full_doc_info.deleted when MergeConflicts is true, since we don't need
+    % to know whether the doc is deleted between iterations. Since we still
+    % need to know if the doc is deleted after the merge happens, we have to
+    % set it here.
+    NewDocInfo1 = case MergeConflicts of
+        true ->
+            NewDocInfo0#full_doc_info{
+                deleted = couch_doc:is_deleted(NewDocInfo0)
+            };
+        false ->
+            NewDocInfo0
+    end,
+    if NewDocInfo1 == OldDocInfo ->
         % nothing changed
         merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
             AccNewInfos, AccRemoveSeqs, AccSeq);
     true ->
-        % we have updated the document, give it a new seq #
-        NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree},
+        % We have updated the document, give it a new update_seq. Its
+        % important to note that the update_seq on OldDocInfo should
+        % be identical to the value on NewDocInfo1.
+        OldSeq = OldDocInfo#full_doc_info.update_seq,
+        NewDocInfo2 = NewDocInfo1#full_doc_info{
+            update_seq = AccSeq + 1
+        },
         RemoveSeqs = case OldSeq of
             0 -> AccRemoveSeqs;
             _ -> [OldSeq | AccRemoveSeqs]
         end,
         merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
-            [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1)
+            [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1)
     end.
 
-
-
-new_index_entries([], AccById, AccDDocIds) ->
-    {AccById, AccDDocIds};
-new_index_entries([#full_doc_info{id=Id}=Info | Rest], AccById, AccDDocIds) ->
-    #doc_info{revs=[#rev_info{deleted=Del}|_]} = couch_doc:to_doc_info(Info),
-    AccById2 = [Info#full_doc_info{deleted=Del} | AccById],
-    AccDDocIds2 = case Id of
-        <<?DESIGN_DOC_PREFIX, _/binary>> -> [Id | AccDDocIds];
-        _ -> AccDDocIds
-    end,
-    new_index_entries(Rest, AccById2, AccDDocIds2).
-
+merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
+        when OldInfo#full_doc_info.deleted ->
+    % We're recreating a document that was previously
+    % deleted. To check that this is a recreation from
+    % the root we assert that the new document has a
+    % revision depth of 1 (this is to avoid recreating a
+    % doc from a previous internal revision) and is also
+    % not deleted. To avoid expanding the revision tree
+    % unnecessarily we create a new revision based on
+    % the winning deleted revision.
+
+    {RevDepth, _} = NewDoc#doc.revs,
+    NewDeleted = NewDoc#doc.deleted,
+    case RevDepth == 1 andalso not NewDeleted of
+        true ->
+            % Update the new doc based on revisions in OldInfo
+            #doc_info{revs=[WinningRev | _]} = couch_doc:to_doc_info(OldInfo),
+            #rev_info{rev={OldPos, OldRev}} = WinningRev,
+            NewRevId = couch_db:new_revid(NewDoc#doc{revs={OldPos, [OldRev]}}),
+            NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},
+
+            % Merge our modified new doc into the tree
+            #full_doc_info{rev_tree=OldTree} = OldInfo,
+            NewTree0 = couch_db:doc_to_tree(NewDoc2),
+            case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+                {NewTree1, new_leaf} ->
+                    % We changed the revision id so inform the caller
+                    send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}),
+                    OldInfo#full_doc_info{
+                        rev_tree = NewTree1,
+                        deleted = false
+                    };
+                _ ->
+                    throw(doc_recreation_failed)
+            end;
+        _ ->
+            send_result(Client, NewDoc, conflict),
+            OldInfo
+    end;
+merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
+    % We're attempting to merge a new revision into an
+    % undeleted document. To not be a conflict we require
+    % that the merge results in extending a branch.
+
+    OldTree = OldInfo#full_doc_info.rev_tree,
+    NewTree0 = couch_db:doc_to_tree(NewDoc),
+    NewDeleted = NewDoc#doc.deleted,
+    case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+        {NewTree, new_leaf} when not NewDeleted ->
+            OldInfo#full_doc_info{
+                rev_tree = NewTree,
+                deleted = false
+            };
+        {NewTree, new_leaf} when NewDeleted ->
+            % We have to check if we just deleted this
+            % document completely or if it was a conflict
+            % resolution.
+            OldInfo#full_doc_info{
+                rev_tree = NewTree,
+                deleted = couch_doc:is_deleted(NewTree)
+            };
+        _ ->
+            send_result(Client, NewDoc, conflict),
+            OldInfo
+    end;
+merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) ->
+    % We're merging in revisions without caring about
+    % conflicts. Most likely this is a replication update.
+    OldTree = OldInfo#full_doc_info.rev_tree,
+    NewTree0 = couch_db:doc_to_tree(NewDoc),
+    {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit),
+    OldInfo#full_doc_info{rev_tree = NewTree}.
 
 stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
     [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} ||
@@ -745,10 +770,7 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
 
     % Write out the document summaries (the bodies are stored in the nodes of
     % the trees, the attachments are already written to disk)
-    {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
-
-    {IndexFullDocInfos, UpdatedDDocIds} =
-            new_index_entries(FlushedFullDocInfos, [], []),
+    {ok, IndexFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
 
     % and the indexes
     {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexFullDocInfos, []),
@@ -761,6 +783,11 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
 
     % Check if we just updated any design documents, and update the validation
     % funs if we did.
+    UpdatedDDocIds = lists:flatmap(fun
+        (<<"_design/", _/binary>> = Id) -> [Id];
+        (_) -> []
+    end, Ids),
+
     Db4 = case length(UpdatedDDocIds) > 0 of
         true ->
             couch_event:notify(Db3#db.name, ddoc_updated),


[2/4] couch commit: updated refs/heads/windsor-merge-325 to d75dca1

Posted by rn...@apache.org.
Add couch_doc:is_deleted/1

This adds a helper function for checking if a \#full_doc_info is
deleted. It's more effecient than checking the first \#rev_info in the
result of couch_doc:to_doc_info/1 because it simply checks if any leaf
is deleted rather than sorting the leaves based on deleted status.

BugzId: 25150


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/68a10128
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/68a10128
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/68a10128

Branch: refs/heads/windsor-merge-325
Commit: 68a1012801b59ff3f33eba30fabc96cfefa8bb2b
Parents: 050fea2
Author: Benjamin Bastian <be...@gmail.com>
Authored: Thu Dec 12 13:23:49 2013 -0800
Committer: Robert Newson <rn...@apache.org>
Committed: Wed Aug 6 15:21:37 2014 +0100

----------------------------------------------------------------------
 src/couch_doc.erl | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/68a10128/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index 9263ba3..d82d626 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -22,6 +22,7 @@
 -export([to_path/1]).
 -export([mp_parse_doc/2]).
 -export([with_ejson_body/1]).
+-export([is_deleted/1]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -367,6 +368,23 @@ to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) ->
     {#doc_info{id=Id, high_seq=max_seq(Tree, FDISeq), revs=RevInfos}, WinPath}.
 
 
+is_deleted(#full_doc_info{rev_tree=Tree}) ->
+    is_deleted(Tree);
+is_deleted(Tree) ->
+    Leafs = couch_key_tree:get_all_leafs(Tree),
+    try
+        lists:foldl(fun
+            ({#leaf{deleted=false},_}, _) ->
+                throw(not_deleted);
+            ({#doc{deleted=false},_}, _) ->
+                throw(not_deleted);
+            (_, Acc) ->
+                Acc
+        end, nil, Leafs),
+        true
+    catch throw:not_deleted ->
+        false
+    end.
 
 
 att_foldl(#att{data=Bin}, Fun, Acc) when is_binary(Bin) ->


[4/4] couch commit: updated refs/heads/windsor-merge-325 to d75dca1

Posted by rn...@apache.org.
Update old function specs to be correct


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d75dca18
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d75dca18
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d75dca18

Branch: refs/heads/windsor-merge-325
Commit: d75dca18e3fe1914381f071edf65e5b9b1e7aab3
Parents: d3937f5
Author: Benjamin Bastian <be...@gmail.com>
Authored: Thu Jan 23 14:27:53 2014 -0800
Committer: Robert Newson <rn...@apache.org>
Committed: Wed Aug 6 17:36:36 2014 +0100

----------------------------------------------------------------------
 src/couch_key_tree.erl | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d75dca18/src/couch_key_tree.erl
----------------------------------------------------------------------
diff --git a/src/couch_key_tree.erl b/src/couch_key_tree.erl
index 1659547..9ad6f7f 100644
--- a/src/couch_key_tree.erl
+++ b/src/couch_key_tree.erl
@@ -90,7 +90,8 @@ merge(RevTree, Tree) ->
 %% @doc Attempt to merge Tree into each branch of the RevTree.
 %% If it can't find a branch that the new tree merges into, add it as a
 %% new branch in the RevTree.
--spec merge_tree(revtree(), tree(), revtree()) -> {revtree(), boolean()}.
+-spec merge_tree(revtree(), tree(), revtree()) ->
+                {revtree(), new_leaf | new_branch | internal_node}.
 merge_tree([], Tree, []) ->
     {[Tree], new_leaf};
 merge_tree([], Tree, MergeAcc) ->
@@ -122,7 +123,8 @@ merge_tree([{Depth, Nodes} | Rest], {IDepth, INodes}=Tree, MergeAcc) ->
 %% before we can start comparing node keys. If one of the branches
 %% ends up running out of nodes we know that these two branches can
 %% not be merged.
--spec merge_at([node()], integer(), [node()]) -> {revtree(), boolean()} | fail.
+-spec merge_at([node()], integer(), [node()]) ->
+                {revtree(), new_leaf | new_branch | internal_node} | fail.
 merge_at(_Nodes, _Pos, []) ->
     fail;
 merge_at([], _Pos, _INodes) ->
@@ -164,7 +166,8 @@ merge_at([Tree | Sibs], 0, INodes) ->
         fail -> fail
     end.
 
--spec merge_extend(tree(), tree()) -> {tree(), atom()}.
+-spec merge_extend(tree(), tree()) ->
+                {tree(), new_leaf | new_branch | internal_node}.
 merge_extend([], B) when B =/= [] ->
     % Most likely the insert branch simply extends this one, so the new
     % branch is exactly B. Its also possible that B is a branch because