You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ko...@apache.org on 2017/03/01 16:38:41 UTC

[43/50] fabric commit: updated refs/heads/2971-count-distinct to 5d18415

In open_revs, do not count errors in quorum threshold calculation

Previously quorum check looked just at the number of replies and decided quorum
was met even if it only received errors. For example, if 2 nodes are in
maintance mode it might receive this sequence of replies:

  `rexi_EXIT, rexi_EXIT, ok`

In that case after the first two it would decide quorum (r=2) was met, return
what it had so far ([]) and kill the remaining worker, who was about to return
a valid revision.

The fix is to keep track of error replies and subtract them when deciding if
quorum was met.

COUCHDB-3271


Project: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/commit/ec223519
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/tree/ec223519
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/diff/ec223519

Branch: refs/heads/2971-count-distinct
Commit: ec2235196d7195afab59cedc2d61a02b11596ab4
Parents: 6fa8d3c
Author: Nick Vatamaniuc <va...@apache.org>
Authored: Thu Jan 12 13:50:20 2017 -0500
Committer: Nick Vatamaniuc <va...@apache.org>
Committed: Fri Jan 20 10:56:14 2017 -0500

----------------------------------------------------------------------
 src/fabric_doc_open_revs.erl | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-fabric/blob/ec223519/src/fabric_doc_open_revs.erl
----------------------------------------------------------------------
diff --git a/src/fabric_doc_open_revs.erl b/src/fabric_doc_open_revs.erl
index 573cda7..5393f92 100644
--- a/src/fabric_doc_open_revs.erl
+++ b/src/fabric_doc_open_revs.erl
@@ -24,6 +24,7 @@
     worker_count,
     workers,
     reply_count = 0,
+    reply_error_count = 0,
     r,
     revs,
     latest,
@@ -60,13 +61,15 @@ go(DbName, Id, Revs, Options) ->
 
 handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Worker, #state{workers=Workers}=State) ->
     NewState = State#state{
-        workers = lists:keydelete(NodeRef, #shard.node, Workers)
+        workers = lists:keydelete(NodeRef, #shard.node, Workers),
+        reply_error_count = State#state.reply_error_count + 1
     },
     handle_message({ok, []}, nil, NewState);
 
 handle_message({rexi_EXIT, _}, Worker, #state{workers=Workers}=State) ->
     NewState = State#state{
-        workers = lists:delete(Worker, Workers)
+        workers = lists:delete(Worker, Workers),
+        reply_error_count = State#state.reply_error_count + 1
     },
     handle_message({ok, []}, nil, NewState);
 
@@ -80,18 +83,22 @@ handle_message({ok, RawReplies}, Worker, State) ->
         r = R,
         revs = Revs,
         latest = Latest,
-        repair = InRepair
+        repair = InRepair,
+        reply_error_count = ReplyErrorCount
     } = State,
 
     IsTree = Revs == all orelse Latest,
 
+    % Do not count error replies when checking quorum
+    QuorumReplies = ReplyCount + 1 - ReplyErrorCount >= R,
+
     {NewReplies, QuorumMet, Repair} = case IsTree of
         true ->
             {NewReplies0, AllInternal, Repair0} =
                     tree_replies(PrevReplies, tree_sort(RawReplies)),
             NumLeafs = couch_key_tree:count_leafs(PrevReplies),
             SameNumRevs = length(RawReplies) == NumLeafs,
-            QMet = AllInternal andalso SameNumRevs andalso ReplyCount + 1 >= R,
+            QMet = AllInternal andalso SameNumRevs andalso QuorumReplies,
             {NewReplies0, QMet, Repair0};
         false ->
             {NewReplies0, MinCount} = dict_replies(PrevReplies, RawReplies),
@@ -306,6 +313,7 @@ open_doc_revs_test_() ->
             check_ancestor_counted_in_quorum(),
             check_not_found_counts_for_descendant(),
             check_worker_error_skipped(),
+            check_quorum_only_counts_valid_responses(),
             check_not_found_replies_are_removed_when_doc_found(),
             check_not_found_returned_when_one_of_docs_not_found(),
             check_not_found_returned_when_doc_not_found()
@@ -469,6 +477,20 @@ check_worker_error_skipped() ->
     end).
 
 
+check_quorum_only_counts_valid_responses() ->
+    ?_test(begin
+        S0 = state0(revs(), true),
+        Msg1 = {rexi_EXIT, reason},
+        Msg2 = {rexi_EXIT, reason},
+        Msg3 = {ok, [foo1(), bar1(), baz1()]},
+        Expect = {stop, [bar1(), baz1(), foo1()]},
+
+        {ok, S1} = handle_message(Msg1, w1, S0),
+        {ok, S2} = handle_message(Msg2, w2, S1),
+        ?assertEqual(Expect, handle_message(Msg3, w3, S2))
+    end).
+
+
 check_not_found_replies_are_removed_when_doc_found() ->
     ?_test(begin
         Replies = replies_to_dict([foo1(), bar1(), fooNF()]),