You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/23 19:07:50 UTC

kudu git commit: KUDU-1097: try adding a new replica when majority is on-line

Repository: kudu
Updated Branches:
  refs/heads/master a89c8f395 -> 63025ef6d


KUDU-1097: try adding a new replica when majority is on-line

With this patch, the consensus::ShouldAddReplica() utility function
checks whether a majority of voter replicas is on-line before attempting
to add a new non-voter replica into the tablet Raft configuration.  This
makes it more consistent with the behavior of the 'symmetric'
consensus::ShouldEvictReplica() function.

Change-Id: Ida198b96e76102f82d0b32116c45e41d26d15314
Reviewed-on: http://gerrit.cloudera.org:8080/8863
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/63025ef6
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/63025ef6
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/63025ef6

Branch: refs/heads/master
Commit: 63025ef6ddf580504e88d45bd588f2934a18bd4c
Parents: a89c8f3
Author: Alexey Serbin <as...@cloudera.com>
Authored: Mon Dec 18 11:12:50 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 23 18:30:20 2017 +0000

----------------------------------------------------------------------
 src/kudu/consensus/quorum_util-test.cc | 66 +++++++++++++++++++++--------
 src/kudu/consensus/quorum_util.cc      | 21 +++++++--
 2 files changed, 67 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/63025ef6/src/kudu/consensus/quorum_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc
index 5e6ac0e..b238221 100644
--- a/src/kudu/consensus/quorum_util-test.cc
+++ b/src/kudu/consensus/quorum_util-test.cc
@@ -283,7 +283,9 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "C", V);
     EXPECT_FALSE(ShouldAddReplica(config, 2));
     EXPECT_FALSE(ShouldAddReplica(config, 3));
-    EXPECT_TRUE(ShouldAddReplica(config, 4));
+    // The configuration is under-replicated, but there are not enough healthy
+    // voters to commit the configuration change.
+    EXPECT_FALSE(ShouldAddReplica(config, 4));
   }
   {
     RaftConfigPB config;
@@ -292,7 +294,9 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "C", V, '?');
     EXPECT_FALSE(ShouldAddReplica(config, 2));
     EXPECT_FALSE(ShouldAddReplica(config, 3));
-    EXPECT_TRUE(ShouldAddReplica(config, 4));
+    // The configuration is under-replicated, but there are not enough healthy
+    // voters to commit the configuration change.
+    EXPECT_FALSE(ShouldAddReplica(config, 4));
   }
   {
     RaftConfigPB config;
@@ -300,7 +304,9 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '-');
     EXPECT_FALSE(ShouldAddReplica(config, 2));
-    EXPECT_TRUE(ShouldAddReplica(config, 3));
+    // The configuration is under-replicated, but there are not enough healthy
+    // voters to commit the configuration change.
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
   }
   {
     RaftConfigPB config;
@@ -316,7 +322,9 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", N, '+');
     EXPECT_FALSE(ShouldAddReplica(config, 2));
-    EXPECT_TRUE(ShouldAddReplica(config, 3));
+    // The configuration is under-replicated, but there are not enough healthy
+    // voters to commit the configuration change.
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
   }
   {
     RaftConfigPB config;
@@ -324,8 +332,10 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", N, '+');
     EXPECT_FALSE(ShouldAddReplica(config, 1));
-    EXPECT_TRUE(ShouldAddReplica(config, 2));
-    EXPECT_TRUE(ShouldAddReplica(config, 3));
+    // The configuration is under-replicated, but there are not enough healthy
+    // voters to commit the configuration change.
+    EXPECT_FALSE(ShouldAddReplica(config, 2));
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
   }
   {
     RaftConfigPB config;
@@ -334,7 +344,9 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "C", N, '+', {{"PROMOTE", true}});
     EXPECT_FALSE(ShouldAddReplica(config, 1));
     EXPECT_FALSE(ShouldAddReplica(config, 2));
-    EXPECT_TRUE(ShouldAddReplica(config, 3));
+    // The configuration is under-replicated, but there are not enough healthy
+    // voters to commit the configuration change.
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
   }
   {
     RaftConfigPB config;
@@ -342,7 +354,7 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", N, '-', {{"PROMOTE", true}});
     EXPECT_FALSE(ShouldAddReplica(config, 1));
-    EXPECT_TRUE(ShouldAddReplica(config, 2));
+    EXPECT_FALSE(ShouldAddReplica(config, 2));
   }
   {
     RaftConfigPB config;
@@ -356,6 +368,16 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", V, '+');
+    AddPeer(&config, "C", V, '?');
+    EXPECT_FALSE(ShouldAddReplica(config, 2));
+    // The catalog manager should wait for a definite health status of replica
+    // 'C' before making decision whether to add replica for replacement or not.
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
+  }
+  {
+    RaftConfigPB config;
+    AddPeer(&config, "A", V, '+');
+    AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '+', {{"REPLACE", true}});
     EXPECT_TRUE(ShouldAddReplica(config, 3));
     EXPECT_FALSE(ShouldAddReplica(config, 2));
@@ -443,10 +465,20 @@ TEST(QuorumUtilTest, ShouldAddReplica) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", V, '-');
-    // The catalog manager will be able to carry on the required update of the
-    // configuration after achieving the majority.
-    // TODO(aserbin): add an integration test for that.
-    EXPECT_TRUE(ShouldAddReplica(config, 3));
+    // The catalog manager will not add a new non-voter replica until the
+    // situation is resolved -- this case requires manual intervention.
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
+  }
+  {
+    RaftConfigPB config;
+    AddPeer(&config, "A", V, '+');
+    AddPeer(&config, "B", V, '-');
+    AddPeer(&config, "C", V, '+');
+    AddPeer(&config, "D", V, '-');
+    AddPeer(&config, "E", V, '+');
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
+    EXPECT_TRUE(ShouldAddReplica(config, 4));
+    EXPECT_TRUE(ShouldAddReplica(config, 5));
   }
 }
 
@@ -760,9 +792,9 @@ TEST(QuorumUtilTest, TestDontEvictLeader) {
   }
 }
 
-// This is a testcase for tablet configurations which can simultaneously be
-// under-replicated and contain a replica suitable for eviction.
-TEST(QuorumUtilTest, TooManyVoters) {
+// This is a testcase for tablet configurations with more than the required
+// number of voter replicas but without a majority of replicas being on-line.
+TEST(QuorumUtilTest, TooManyVotersWithoutMajority) {
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
@@ -783,7 +815,7 @@ TEST(QuorumUtilTest, TooManyVoters) {
     string to_evict;
     ASSERT_TRUE(ShouldEvictReplica(config, "A", 3, &to_evict));
     EXPECT_TRUE(to_evict == "C" || to_evict == "D") << to_evict;
-    EXPECT_TRUE(ShouldAddReplica(config, 3));
+    EXPECT_FALSE(ShouldAddReplica(config, 3));
   }
 }
 
@@ -1195,7 +1227,7 @@ TEST(QuorumUtilTest, NewlyPromotedReplicaCrashes) {
   SetPeerHealth(&config, "D", '-');
   ASSERT_TRUE(ShouldEvictReplica(config, "A", kReplicationFactor, &to_evict));
   EXPECT_TRUE(to_evict == "B" || to_evict == "D") << to_evict;
-  EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor));
+  EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor));
 
   RemovePeer(&config, to_evict);
   EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor));

http://git-wip-us.apache.org/repos/asf/kudu/blob/63025ef6/src/kudu/consensus/quorum_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.cc b/src/kudu/consensus/quorum_util.cc
index 8bf400d..c880395 100644
--- a/src/kudu/consensus/quorum_util.cc
+++ b/src/kudu/consensus/quorum_util.cc
@@ -403,6 +403,7 @@ string DiffConsensusStates(const ConsensusStatePB& old_state,
 //                the latter case.
 bool ShouldAddReplica(const RaftConfigPB& config, int replication_factor) {
   int num_voters_total = 0;
+  int num_voters_healthy = 0;
   int num_voters_need_replacement = 0;
   int num_non_voters_to_promote = 0;
 
@@ -418,6 +419,9 @@ bool ShouldAddReplica(const RaftConfigPB& config, int replication_factor) {
             peer.health_report().overall_health() == HealthReportPB::FAILED) {
           ++num_voters_need_replacement;
         }
+        if (peer.health_report().overall_health() == HealthReportPB::HEALTHY) {
+          ++num_voters_healthy;
+        }
         break;
       case RaftPeerPB::NON_VOTER:
         if (peer.attrs().promote() &&
@@ -430,11 +434,22 @@ bool ShouldAddReplica(const RaftConfigPB& config, int replication_factor) {
         break;
     }
   }
+
+  // Whether the configuration is under-replicated: the projected number of
+  // viable replicas is less than the required replication factor.
   const bool is_under_replicated = replication_factor >
-      (num_voters_total - num_voters_need_replacement) + num_non_voters_to_promote;
+      num_voters_total - num_voters_need_replacement + num_non_voters_to_promote;
+
+  // Whether it's time to add a new replica: the tablet Raft configuration might
+  // be under-replicated, but it does not make much sense trying to add a new
+  // replica if the configuration change cannot be committed.
+  const bool should_add_replica = is_under_replicated &&
+      num_voters_healthy >= MajoritySize(num_voters_total);
+
   VLOG(2) << "decision: the config is" << (is_under_replicated ? " " : " not ")
-          << "under-replicated";
-  return is_under_replicated;
+          << "under-replicated; should" << (should_add_replica ? " " : " not ")
+          << "add a non-voter replica";
+  return should_add_replica;
 }
 
 // Whether there is an excess replica to evict.