You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/02 01:50:41 UTC

[3/4] kudu git commit: KUDU-1097. Never evict a leader

KUDU-1097. Never evict a leader

The master should never try to evict the leader replica when the tablet
is over-replicated and arbitrary voters are eligible for eviction. This
patch ensures that is the case.

Change-Id: I57adc8c4c4a6a3b5ad0c0b9202407a4dd9e1e8b0
Reviewed-on: http://gerrit.cloudera.org:8080/8680
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4d44c9fe
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4d44c9fe
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4d44c9fe

Branch: refs/heads/master
Commit: 4d44c9fe756d92997f71e5d8f2a25443afef0045
Parents: d1d3560
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 02:56:58 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 01:45:37 2017 +0000

----------------------------------------------------------------------
 src/kudu/consensus/quorum_util-test.cc | 102 ++++++++++++++++------------
 src/kudu/consensus/quorum_util.cc      |  13 ++++
 src/kudu/consensus/quorum_util.h       |  10 +--
 src/kudu/master/catalog_manager.cc     |   3 +-
 4 files changed, 80 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc
index 461ae7a..cd038fc 100644
--- a/src/kudu/consensus/quorum_util-test.cc
+++ b/src/kudu/consensus/quorum_util-test.cc
@@ -410,16 +410,16 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "A", V);
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", V);
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "", 2));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '?');
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '-');
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "", 2));
   }
   {
     RaftConfigPB config;
@@ -427,18 +427,18 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", V, '+');
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "C", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "C", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "C", 2));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+', {{"REPLACE", true}});
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", V);
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
   }
   {
     RaftConfigPB config;
@@ -446,9 +446,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+', {{"REPLACE", false}});
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 4));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 4));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
   }
   {
@@ -457,9 +457,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '+');
     AddPeer(&config, "D", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 4));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 4));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -468,9 +468,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '?', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -479,9 +479,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '-', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -490,9 +490,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '+', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -501,9 +501,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '?');
     AddPeer(&config, "D", V, '+', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -512,9 +512,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '+');
     AddPeer(&config, "D", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 4));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 4));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
 }
@@ -525,20 +525,20 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V);
-    EXPECT_FALSE(CanEvictReplica(config, 1));
+    EXPECT_FALSE(CanEvictReplica(config, "", 1));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 1));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 1));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -547,18 +547,18 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", N, '+');
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N, '-', {{"PROMOTE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -566,9 +566,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N, '-');
     AddPeer(&config, "C", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -576,9 +576,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N, '?');
     AddPeer(&config, "C", N, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -586,9 +586,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
   }
   {
@@ -596,17 +596,17 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '-');
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
-    EXPECT_FALSE(CanEvictReplica(config, 1));
+    EXPECT_FALSE(CanEvictReplica(config, "", 2));
+    EXPECT_FALSE(CanEvictReplica(config, "", 1));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", N, '-', {{"PROMOTE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
   }
   {
     RaftConfigPB config;
@@ -614,12 +614,28 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '+');
     AddPeer(&config, "D", N, '+', {{"PROMOTE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "B", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "B", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
 }
 
+// Exhaustively loop through all nodes, each as leader, when over-replicated
+// and ensure that the leader never gets evicted.
+TEST(QuorumUtilTest, TestDontEvictLeader) {
+  const vector<string> nodes = { "A", "B", "C", "D" };
+  RaftConfigPB config;
+  AddPeer(&config, nodes[0], V, '+');
+  AddPeer(&config, nodes[1], V, '+');
+  AddPeer(&config, nodes[2], V, '+');
+  AddPeer(&config, nodes[3], V, '+');
+  for (const auto& leader_node : nodes) {
+    string to_evict;
+    ASSERT_TRUE(CanEvictReplica(config, leader_node, 3, &to_evict));
+    ASSERT_NE(leader_node, to_evict);
+  }
+}
+
 } // namespace consensus
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.cc b/src/kudu/consensus/quorum_util.cc
index 8632e3c..c544256 100644
--- a/src/kudu/consensus/quorum_util.cc
+++ b/src/kudu/consensus/quorum_util.cc
@@ -439,8 +439,12 @@ bool IsUnderReplicated(const RaftConfigPB& config, int replication_factor) {
 
 // Whether there is an excess replica to evict.
 bool CanEvictReplica(const RaftConfigPB& config,
+                     const string& leader_uuid,
                      int replication_factor,
                      string* uuid_to_evict) {
+  // If there is no leader, we can't evict anybody.
+  if (leader_uuid.empty()) return false;
+
   int num_non_voters_total = 0;
 
   int num_voters_healthy = 0;
@@ -475,6 +479,13 @@ bool CanEvictReplica(const RaftConfigPB& config,
         if (healthy && !has_replace) {
           ++num_voters_healthy;
         }
+        // Everything below here is to check for replicas to evict.
+        if (peer_uuid == leader_uuid) {
+          // A leader should always report itself as being healthy.
+          DCHECK(healthy) << peer_uuid << ": " << SecureShortDebugString(config);
+          break;
+        }
+
         if (failed && has_replace) {
           voter_failed = voter_replace = peer_uuid;
         }
@@ -492,6 +503,8 @@ bool CanEvictReplica(const RaftConfigPB& config,
 
       case RaftPeerPB::NON_VOTER:
         ++num_non_voters_total;
+        DCHECK_NE(peer_uuid, leader_uuid) << peer_uuid
+            << ": non-voter as a leader; " << SecureShortDebugString(config);
         if (failed && non_voter_failed.empty()) {
           non_voter_failed = peer_uuid;
         }

http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.h b/src/kudu/consensus/quorum_util.h
index b0d12ae..9afe13f 100644
--- a/src/kudu/consensus/quorum_util.h
+++ b/src/kudu/consensus/quorum_util.h
@@ -94,11 +94,13 @@ bool IsUnderReplicated(const RaftConfigPB& config, int replication_factor);
 
 // Check if the given Raft configuration contains at least one extra replica
 // which can be removed in accordance with the specified replication
-// factor. If so, then return 'true' and set the UUID of the best suited
-// replica into the 'uuid_to_evict' out parameter. Otherwise, return 'false'.
+// factor and current Raft leader. If so, then return 'true' and set the UUID
+// of the best suited replica into the 'uuid_to_evict' out parameter. Otherwise,
+// return 'false'.
 bool CanEvictReplica(const RaftConfigPB& config,
-                         int replication_factor,
-                         std::string* uuid_to_evict = nullptr);
+                     const std::string& leader_uuid,
+                     int replication_factor,
+                     std::string* uuid_to_evict = nullptr);
 
 }  // namespace consensus
 }  // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 8144d74..abfed16 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3509,7 +3509,8 @@ Status CatalogManager::ProcessTabletReport(
         const RaftConfigPB& config = cstate.committed_config();
         string to_evict;
         if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
-                         CanEvictReplica(config, replication_factor, &to_evict)) {
+                         CanEvictReplica(config, cstate.leader_uuid(), replication_factor,
+                                         &to_evict)) {
           DCHECK(!to_evict.empty());
           rpcs.emplace_back(new AsyncEvictReplicaTask(
               master_, tablet, cstate, std::move(to_evict)));