You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2014/08/06 18:57:00 UTC
[3/4] couch commit: updated refs/heads/windsor-merge-325 to d75dca1
Rewrite merge_rev_trees to handle new merge output
This commit adds a complete rewrite of couch_db_updater:merge_rev_trees
to support the new couch_key_tree:merge output. Since
couch_key_tree:merge now returns the position of the merged path in the
tree, merge_rev_trees can explicitly check if a document is being
recreated and behave accordingly.
BugzID: 25150
davisp needs to check this one.
Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d3937f59
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d3937f59
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d3937f59
Branch: refs/heads/windsor-merge-325
Commit: d3937f59a0c2f66631723fd542f83b68a7a149f7
Parents: 68a1012
Author: Paul J. Davis <pa...@gmail.com>
Authored: Fri Nov 15 13:17:15 2013 -0600
Committer: Robert Newson <rn...@apache.org>
Committed: Wed Aug 6 17:19:15 2014 +0100
----------------------------------------------------------------------
src/couch_db_updater.erl | 181 ++++++++++++++++++++++++------------------
1 file changed, 104 insertions(+), 77 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d3937f59/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 3ff5963..4e2a0d6 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -629,90 +629,115 @@ merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) ->
{ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq};
merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
[OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
- #full_doc_info{id=Id,rev_tree=OldTree,deleted=OldDeleted0,update_seq=OldSeq}
- = OldDocInfo,
- {NewRevTree, _} = lists:foldl(
- fun({Client, #doc{revs={Pos,[_Rev|PrevRevs]}}=NewDoc}, {AccTree, OldDeleted}) ->
- if not MergeConflicts ->
- case couch_key_tree:merge(AccTree, couch_doc:to_path(NewDoc),
- Limit) of
- {_NewTree, conflicts} when (not OldDeleted) ->
- send_result(Client, NewDoc, conflict),
- {AccTree, OldDeleted};
- {NewTree, conflicts} when PrevRevs /= [] ->
- % Check to be sure if prev revision was specified, it's
- % a leaf node in the tree
- Leafs = couch_key_tree:get_all_leafs(AccTree),
- IsPrevLeaf = lists:any(fun({_, {LeafPos, [LeafRevId|_]}}) ->
- {LeafPos, LeafRevId} == {Pos-1, hd(PrevRevs)}
- end, Leafs),
- if IsPrevLeaf ->
- {NewTree, OldDeleted};
- true ->
- send_result(Client, NewDoc, conflict),
- {AccTree, OldDeleted}
- end;
- {NewTree, no_conflicts} when AccTree == NewTree ->
- % the tree didn't change at all
- % meaning we are saving a rev that's already
- % been editted again.
- if (Pos == 1) and OldDeleted ->
- % this means we are recreating a brand new document
- % into a state that already existed before.
- % put the rev into a subsequent edit of the deletion
- #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} =
- couch_doc:to_doc_info(OldDocInfo),
- NewRevId = couch_db:new_revid(
- NewDoc#doc{revs={OldPos, [OldRev]}}),
- NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},
- {NewTree2, _} = couch_key_tree:merge(AccTree,
- couch_doc:to_path(NewDoc2), Limit),
- % we changed the rev id, this tells the caller we did
- send_result(Client, NewDoc,
- {ok, {OldPos + 1, NewRevId}}),
- {NewTree2, OldDeleted};
- true ->
- send_result(Client, NewDoc, conflict),
- {AccTree, OldDeleted}
- end;
- {NewTree, _} ->
- {NewTree, NewDoc#doc.deleted}
- end;
- true ->
- {NewTree, _} = couch_key_tree:merge(AccTree,
- couch_doc:to_path(NewDoc), Limit),
- {NewTree, OldDeleted}
- end
- end,
- {OldTree, OldDeleted0}, NewDocs),
- if NewRevTree == OldTree ->
+ NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
+ merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts)
+ end, OldDocInfo, NewDocs),
+ % When MergeConflicts is false, we updated #full_doc_info.deleted on every
+ % iteration of merge_rev_tree. However, merge_rev_tree does not update
+ % #full_doc_info.deleted when MergeConflicts is true, since we don't need
+ % to know whether the doc is deleted between iterations. Since we still
+ % need to know if the doc is deleted after the merge happens, we have to
+ % set it here.
+ NewDocInfo1 = case MergeConflicts of
+ true ->
+ NewDocInfo0#full_doc_info{
+ deleted = couch_doc:is_deleted(NewDocInfo0)
+ };
+ false ->
+ NewDocInfo0
+ end,
+ if NewDocInfo1 == OldDocInfo ->
% nothing changed
merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
AccNewInfos, AccRemoveSeqs, AccSeq);
true ->
- % we have updated the document, give it a new seq #
- NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree},
+ % We have updated the document, give it a new update_seq. Its
+ % important to note that the update_seq on OldDocInfo should
+ % be identical to the value on NewDocInfo1.
+ OldSeq = OldDocInfo#full_doc_info.update_seq,
+ NewDocInfo2 = NewDocInfo1#full_doc_info{
+ update_seq = AccSeq + 1
+ },
RemoveSeqs = case OldSeq of
0 -> AccRemoveSeqs;
_ -> [OldSeq | AccRemoveSeqs]
end,
merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
- [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1)
+ [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1)
end.
-
-
-new_index_entries([], AccById, AccDDocIds) ->
- {AccById, AccDDocIds};
-new_index_entries([#full_doc_info{id=Id}=Info | Rest], AccById, AccDDocIds) ->
- #doc_info{revs=[#rev_info{deleted=Del}|_]} = couch_doc:to_doc_info(Info),
- AccById2 = [Info#full_doc_info{deleted=Del} | AccById],
- AccDDocIds2 = case Id of
- <<?DESIGN_DOC_PREFIX, _/binary>> -> [Id | AccDDocIds];
- _ -> AccDDocIds
- end,
- new_index_entries(Rest, AccById2, AccDDocIds2).
-
+merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
+ when OldInfo#full_doc_info.deleted ->
+ % We're recreating a document that was previously
+ % deleted. To check that this is a recreation from
+ % the root we assert that the new document has a
+ % revision depth of 1 (this is to avoid recreating a
+ % doc from a previous internal revision) and is also
+ % not deleted. To avoid expanding the revision tree
+ % unnecessarily we create a new revision based on
+ % the winning deleted revision.
+
+ {RevDepth, _} = NewDoc#doc.revs,
+ NewDeleted = NewDoc#doc.deleted,
+ case RevDepth == 1 andalso not NewDeleted of
+ true ->
+ % Update the new doc based on revisions in OldInfo
+ #doc_info{revs=[WinningRev | _]} = couch_doc:to_doc_info(OldInfo),
+ #rev_info{rev={OldPos, OldRev}} = WinningRev,
+ NewRevId = couch_db:new_revid(NewDoc#doc{revs={OldPos, [OldRev]}}),
+ NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},
+
+ % Merge our modified new doc into the tree
+ #full_doc_info{rev_tree=OldTree} = OldInfo,
+ NewTree0 = couch_db:doc_to_tree(NewDoc2),
+ case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+ {NewTree1, new_leaf} ->
+ % We changed the revision id so inform the caller
+ send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}),
+ OldInfo#full_doc_info{
+ rev_tree = NewTree1,
+ deleted = false
+ };
+ _ ->
+ throw(doc_recreation_failed)
+ end;
+ _ ->
+ send_result(Client, NewDoc, conflict),
+ OldInfo
+ end;
+merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
+ % We're attempting to merge a new revision into an
+ % undeleted document. To not be a conflict we require
+ % that the merge results in extending a branch.
+
+ OldTree = OldInfo#full_doc_info.rev_tree,
+ NewTree0 = couch_db:doc_to_tree(NewDoc),
+ NewDeleted = NewDoc#doc.deleted,
+ case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+ {NewTree, new_leaf} when not NewDeleted ->
+ OldInfo#full_doc_info{
+ rev_tree = NewTree,
+ deleted = false
+ };
+ {NewTree, new_leaf} when NewDeleted ->
+ % We have to check if we just deleted this
+ % document completely or if it was a conflict
+ % resolution.
+ OldInfo#full_doc_info{
+ rev_tree = NewTree,
+ deleted = couch_doc:is_deleted(NewTree)
+ };
+ _ ->
+ send_result(Client, NewDoc, conflict),
+ OldInfo
+ end;
+merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) ->
+ % We're merging in revisions without caring about
+ % conflicts. Most likely this is a replication update.
+ OldTree = OldInfo#full_doc_info.rev_tree,
+ NewTree0 = couch_db:doc_to_tree(NewDoc),
+ {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit),
+ OldInfo#full_doc_info{rev_tree = NewTree}.
stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
[Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} ||
@@ -745,10 +770,7 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
% Write out the document summaries (the bodies are stored in the nodes of
% the trees, the attachments are already written to disk)
- {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
-
- {IndexFullDocInfos, UpdatedDDocIds} =
- new_index_entries(FlushedFullDocInfos, [], []),
+ {ok, IndexFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
% and the indexes
{ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexFullDocInfos, []),
@@ -761,6 +783,11 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
% Check if we just updated any design documents, and update the validation
% funs if we did.
+ UpdatedDDocIds = lists:flatmap(fun
+ (<<"_design/", _/binary>> = Id) -> [Id];
+ (_) -> []
+ end, Ids),
+
Db4 = case length(UpdatedDDocIds) > 0 of
true ->
couch_event:notify(Db3#db.name, ddoc_updated),