You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/02 01:50:39 UTC

[1/4] kudu git commit: Disable flaky KUDU-1097 tablet move test

Repository: kudu
Updated Branches:
  refs/heads/master d9754746b -> 98f669d5d


Disable flaky KUDU-1097 tablet move test

This test is flaky because the leader may get marked with replace, which
is not currently reliably supported when KUDU-1097 is enabled.

Change-Id: I118accafd7da99c35beb5a4f4a9f164984f17baf
Reviewed-on: http://gerrit.cloudera.org:8080/8720
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/072685c2
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/072685c2
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/072685c2

Branch: refs/heads/master
Commit: 072685c207f878f9b824201d397e1a8a2eee6a11
Parents: d975474
Author: Mike Percy <mp...@apache.org>
Authored: Fri Dec 1 16:25:07 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 00:59:38 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-admin-test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/072685c2/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc
index fe91dd9..5b7afaf 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -315,7 +315,7 @@ TEST_F(AdminCliTest, TestMoveTablet_pre_KUDU_1097) {
   DoTestMoveTablet(kDisableKudu1097);
 }
 
-TEST_F(AdminCliTest, TestMoveTablet_KUDU_1097) {
+TEST_F(AdminCliTest, DISABLED_TestMoveTablet_KUDU_1097) {
   DoTestMoveTablet(kEnableKudu1097);
 }
 


[2/4] kudu git commit: KUDU-1097. Only add/evict when processing leader tablet reports

Posted by mp...@apache.org.
KUDU-1097. Only add/evict when processing leader tablet reports

The leader replica is the only replica that sends a health report. The
master needs the health report to make an eviction decision. Thus, we
should only consider eviction or addition of a new replica if the tablet
report was received from the leader.

There is no standalone test for this but this patch is required for the
following patch, "Never evict a leader", to pass due to the DCHECK
introduced in that patch.

Change-Id: If7e318e042cd27ff4544bc5aae76db4ab9e51da1
Reviewed-on: http://gerrit.cloudera.org:8080/8682
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d1d35601
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d1d35601
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d1d35601

Branch: refs/heads/master
Commit: d1d35601aa2b281880f6612ddef499686cf8233c
Parents: 072685c
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 04:24:13 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 01:45:29 2017 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager.cc | 44 ++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d1d35601/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index beb4302..8144d74 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3488,32 +3488,36 @@ Status CatalogManager::ProcessTabletReport(
       // 7e. Make tablet configuration change depending on the mode the server
       // is running with. The choice between two alternative modes is controlled
       // by the 'raft_prepare_replacement_before_eviction' run-time flag.
-      if (FLAGS_raft_prepare_replacement_before_eviction) {
-        // An alternative scheme of managing tablet replicas: the catalog
-        // manager processes the health-related info on replicas from the tablet
-        // report and initiates appropriate modifications for the tablet Raft
-        // configuration: evict an already-replaced failed voter replica or add
-        // a new non-voter replica marked for promotion as a replacement.
+      if (!FLAGS_raft_prepare_replacement_before_eviction) {
+        if (consensus_state_updated &&
+            FLAGS_master_add_server_when_underreplicated &&
+            CountVoters(cstate.committed_config()) < replication_factor) {
+          // Add a server to the config if it is under-replicated.
+          //
+          // This is an idempotent operation due to a CAS enforced on the
+          // committed config's opid_index.
+          rpcs.emplace_back(new AsyncAddReplicaTask(
+              master_, tablet, cstate, RaftPeerPB::VOTER, &rng_));
+        }
+
+      // When --raft_prepare_replacement_before_eviction is enabled, we
+      // consider whether to add or evict replicas based on the health report
+      // included in the leader's tablet report. Since only the leader tracks
+      // health, we ignore reports from non-leaders in this case.
+      } else if (!cstate.leader_uuid().empty() &&
+                 ts_desc->permanent_uuid() == cstate.leader_uuid()) {
         const RaftConfigPB& config = cstate.committed_config();
         string to_evict;
-        if (IsUnderReplicated(config, replication_factor)) {
-          rpcs.emplace_back(new AsyncAddReplicaTask(
-              master_, tablet, cstate, RaftPeerPB::NON_VOTER, &rng_));
-        } else if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
-                   CanEvictReplica(config, replication_factor, &to_evict)) {
+        if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
+                         CanEvictReplica(config, replication_factor, &to_evict)) {
           DCHECK(!to_evict.empty());
           rpcs.emplace_back(new AsyncEvictReplicaTask(
               master_, tablet, cstate, std::move(to_evict)));
+        } else if (FLAGS_master_add_server_when_underreplicated &&
+                   IsUnderReplicated(config, replication_factor)) {
+          rpcs.emplace_back(new AsyncAddReplicaTask(
+              master_, tablet, cstate, RaftPeerPB::NON_VOTER, &rng_));
         }
-      } else if (consensus_state_updated &&
-                 FLAGS_master_add_server_when_underreplicated &&
-                 CountVoters(cstate.committed_config()) < replication_factor) {
-        // Add a server to the config if it is under-replicated.
-        //
-        // This is an idempotent operation due to a CAS enforced on the
-        // committed config's opid_index.
-        rpcs.emplace_back(new AsyncAddReplicaTask(
-            master_, tablet, cstate, RaftPeerPB::VOTER, &rng_));
       }
     }
 


[4/4] kudu git commit: KUDU-1097. master: strip health reports from cstate before persisting

Posted by mp...@apache.org.
KUDU-1097. master: strip health reports from cstate before persisting

The master should not persist configs with health reports in them
because the health reports are inherently transient information.

This patch strips the health reports from the cstate included in each
leader tablet report before persisting them and adds a couple of DCHECKS
in random places to enforce that.

Change-Id: I3e0b076146d888d01676442f6ab3f67cca6bacd6
Reviewed-on: http://gerrit.cloudera.org:8080/8683
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/98f669d5
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/98f669d5
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/98f669d5

Branch: refs/heads/master
Commit: 98f669d5d125ac34da2df794ddb7725c4ffab78b
Parents: 4d44c9f
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 04:25:12 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 01:45:45 2017 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager.cc | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/98f669d5/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index abfed16..acea922 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3461,8 +3461,16 @@ Status CatalogManager::ProcessTabletReport(
             tablet_id, ts_desc->ToString(), cstate.committed_config().opid_index(),
             cstate.current_term());
 
+
         // 7d(ii). Update the consensus state.
-        *tablet->mutable_metadata()->mutable_dirty()->pb.mutable_consensus_state() = cstate;
+        // Strip the health report from the cstate before persisting it.
+        auto* dirty_cstate =
+            tablet->mutable_metadata()->mutable_dirty()->pb.mutable_consensus_state();
+        *dirty_cstate = cstate; // Copy in the updated cstate.
+        // Strip out the health reports from the persisted copy *only*.
+        for (auto& peer : *dirty_cstate->mutable_committed_config()->mutable_peers()) {
+          peer.clear_health_report();
+        }
         tablet_was_mutated = true;
 
         // 7d(iii). Delete any replicas from the previous config that are not
@@ -3473,6 +3481,7 @@ Status CatalogManager::ProcessTabletReport(
             InsertOrDie(&current_member_uuids, p.permanent_uuid());
           }
           for (const auto& p : prev_cstate.committed_config().peers()) {
+            DCHECK(!p.has_health_report()); // Health report shouldn't be persisted.
             const string& peer_uuid = p.permanent_uuid();
             if (!ContainsKey(current_member_uuids, peer_uuid)) {
               rpcs.emplace_back(new AsyncDeleteReplica(
@@ -4044,6 +4053,7 @@ Status CatalogManager::BuildLocationsForTablet(
 
   const ConsensusStatePB& cstate = l_tablet.data().pb.consensus_state();
   for (const consensus::RaftPeerPB& peer : cstate.committed_config().peers()) {
+    DCHECK(!peer.has_health_report()); // Health report shouldn't be persisted.
     // TODO(adar): GetConsensusRole() iterates over all of the peers, making this an
     // O(n^2) loop. If replication counts get high, it should be optimized.
     switch (filter) {


[3/4] kudu git commit: KUDU-1097. Never evict a leader

Posted by mp...@apache.org.
KUDU-1097. Never evict a leader

The master should never try to evict the leader replica when the tablet
is over-replicated and arbitrary voters are eligible for eviction. This
patch ensures that is the case.

Change-Id: I57adc8c4c4a6a3b5ad0c0b9202407a4dd9e1e8b0
Reviewed-on: http://gerrit.cloudera.org:8080/8680
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4d44c9fe
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4d44c9fe
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4d44c9fe

Branch: refs/heads/master
Commit: 4d44c9fe756d92997f71e5d8f2a25443afef0045
Parents: d1d3560
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 02:56:58 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 01:45:37 2017 +0000

----------------------------------------------------------------------
 src/kudu/consensus/quorum_util-test.cc | 102 ++++++++++++++++------------
 src/kudu/consensus/quorum_util.cc      |  13 ++++
 src/kudu/consensus/quorum_util.h       |  10 +--
 src/kudu/master/catalog_manager.cc     |   3 +-
 4 files changed, 80 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc
index 461ae7a..cd038fc 100644
--- a/src/kudu/consensus/quorum_util-test.cc
+++ b/src/kudu/consensus/quorum_util-test.cc
@@ -410,16 +410,16 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "A", V);
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", V);
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "", 2));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '?');
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '-');
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "", 2));
   }
   {
     RaftConfigPB config;
@@ -427,18 +427,18 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", V, '+');
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "C", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "C", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "C", 2));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+', {{"REPLACE", true}});
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", V);
-    EXPECT_FALSE(CanEvictReplica(config, 3));
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
   }
   {
     RaftConfigPB config;
@@ -446,9 +446,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+', {{"REPLACE", false}});
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 4));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 4));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
   }
   {
@@ -457,9 +457,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '+');
     AddPeer(&config, "D", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 4));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 4));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -468,9 +468,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '?', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -479,9 +479,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '-', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -490,9 +490,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '-');
     AddPeer(&config, "D", V, '+', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -501,9 +501,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '?');
     AddPeer(&config, "D", V, '+', {{"REPLACE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
   {
@@ -512,9 +512,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
     AddPeer(&config, "B", V, '?');
     AddPeer(&config, "C", V, '+');
     AddPeer(&config, "D", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 4));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 4));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
 }
@@ -525,20 +525,20 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V);
-    EXPECT_FALSE(CanEvictReplica(config, 1));
+    EXPECT_FALSE(CanEvictReplica(config, "", 1));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 1));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 1));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -547,18 +547,18 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", N, '+');
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N, '-', {{"PROMOTE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -566,9 +566,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N, '-');
     AddPeer(&config, "C", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -576,9 +576,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", N, '?');
     AddPeer(&config, "C", N, '+');
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("B", uuid_to_evict);
   }
   {
@@ -586,9 +586,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
     EXPECT_EQ("C", uuid_to_evict);
   }
   {
@@ -596,17 +596,17 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "A", V, '-');
     AddPeer(&config, "B", V);
     AddPeer(&config, "C", N);
-    EXPECT_FALSE(CanEvictReplica(config, 2));
-    EXPECT_FALSE(CanEvictReplica(config, 1));
+    EXPECT_FALSE(CanEvictReplica(config, "", 2));
+    EXPECT_FALSE(CanEvictReplica(config, "", 1));
   }
   {
     RaftConfigPB config;
     AddPeer(&config, "A", V, '+');
     AddPeer(&config, "B", V, '-');
     AddPeer(&config, "C", N, '-', {{"PROMOTE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 2));
+    EXPECT_FALSE(CanEvictReplica(config, "A", 2));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
   }
   {
     RaftConfigPB config;
@@ -614,12 +614,28 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
     AddPeer(&config, "B", V, '+');
     AddPeer(&config, "C", V, '+');
     AddPeer(&config, "D", N, '+', {{"PROMOTE", true}});
-    EXPECT_FALSE(CanEvictReplica(config, 3));
+    EXPECT_FALSE(CanEvictReplica(config, "B", 3));
     string uuid_to_evict;
-    ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+    ASSERT_TRUE(CanEvictReplica(config, "B", 2, &uuid_to_evict));
     EXPECT_EQ("D", uuid_to_evict);
   }
 }
 
+// Exhaustively loop through all nodes, each as leader, when over-replicated
+// and ensure that the leader never gets evicted.
+TEST(QuorumUtilTest, TestDontEvictLeader) {
+  const vector<string> nodes = { "A", "B", "C", "D" };
+  RaftConfigPB config;
+  AddPeer(&config, nodes[0], V, '+');
+  AddPeer(&config, nodes[1], V, '+');
+  AddPeer(&config, nodes[2], V, '+');
+  AddPeer(&config, nodes[3], V, '+');
+  for (const auto& leader_node : nodes) {
+    string to_evict;
+    ASSERT_TRUE(CanEvictReplica(config, leader_node, 3, &to_evict));
+    ASSERT_NE(leader_node, to_evict);
+  }
+}
+
 } // namespace consensus
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.cc b/src/kudu/consensus/quorum_util.cc
index 8632e3c..c544256 100644
--- a/src/kudu/consensus/quorum_util.cc
+++ b/src/kudu/consensus/quorum_util.cc
@@ -439,8 +439,12 @@ bool IsUnderReplicated(const RaftConfigPB& config, int replication_factor) {
 
 // Whether there is an excess replica to evict.
 bool CanEvictReplica(const RaftConfigPB& config,
+                     const string& leader_uuid,
                      int replication_factor,
                      string* uuid_to_evict) {
+  // If there is no leader, we can't evict anybody.
+  if (leader_uuid.empty()) return false;
+
   int num_non_voters_total = 0;
 
   int num_voters_healthy = 0;
@@ -475,6 +479,13 @@ bool CanEvictReplica(const RaftConfigPB& config,
         if (healthy && !has_replace) {
           ++num_voters_healthy;
         }
+        // Everything below here is to check for replicas to evict.
+        if (peer_uuid == leader_uuid) {
+          // A leader should always report itself as being healthy.
+          DCHECK(healthy) << peer_uuid << ": " << SecureShortDebugString(config);
+          break;
+        }
+
         if (failed && has_replace) {
           voter_failed = voter_replace = peer_uuid;
         }
@@ -492,6 +503,8 @@ bool CanEvictReplica(const RaftConfigPB& config,
 
       case RaftPeerPB::NON_VOTER:
         ++num_non_voters_total;
+        DCHECK_NE(peer_uuid, leader_uuid) << peer_uuid
+            << ": non-voter as a leader; " << SecureShortDebugString(config);
         if (failed && non_voter_failed.empty()) {
           non_voter_failed = peer_uuid;
         }

http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.h b/src/kudu/consensus/quorum_util.h
index b0d12ae..9afe13f 100644
--- a/src/kudu/consensus/quorum_util.h
+++ b/src/kudu/consensus/quorum_util.h
@@ -94,11 +94,13 @@ bool IsUnderReplicated(const RaftConfigPB& config, int replication_factor);
 
 // Check if the given Raft configuration contains at least one extra replica
 // which can be removed in accordance with the specified replication
-// factor. If so, then return 'true' and set the UUID of the best suited
-// replica into the 'uuid_to_evict' out parameter. Otherwise, return 'false'.
+// factor and current Raft leader. If so, then return 'true' and set the UUID
+// of the best suited replica into the 'uuid_to_evict' out parameter. Otherwise,
+// return 'false'.
 bool CanEvictReplica(const RaftConfigPB& config,
-                         int replication_factor,
-                         std::string* uuid_to_evict = nullptr);
+                     const std::string& leader_uuid,
+                     int replication_factor,
+                     std::string* uuid_to_evict = nullptr);
 
 }  // namespace consensus
 }  // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 8144d74..abfed16 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3509,7 +3509,8 @@ Status CatalogManager::ProcessTabletReport(
         const RaftConfigPB& config = cstate.committed_config();
         string to_evict;
         if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
-                         CanEvictReplica(config, replication_factor, &to_evict)) {
+                         CanEvictReplica(config, cstate.leader_uuid(), replication_factor,
+                                         &to_evict)) {
           DCHECK(!to_evict.empty());
           rpcs.emplace_back(new AsyncEvictReplicaTask(
               master_, tablet, cstate, std::move(to_evict)));