You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/02 01:50:41 UTC
[3/4] kudu git commit: KUDU-1097. Never evict a leader
KUDU-1097. Never evict a leader
The master should never try to evict the leader replica when the tablet
is over-replicated and arbitrary voters are eligible for eviction. This
patch ensures that is the case.
Change-Id: I57adc8c4c4a6a3b5ad0c0b9202407a4dd9e1e8b0
Reviewed-on: http://gerrit.cloudera.org:8080/8680
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4d44c9fe
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4d44c9fe
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4d44c9fe
Branch: refs/heads/master
Commit: 4d44c9fe756d92997f71e5d8f2a25443afef0045
Parents: d1d3560
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 02:56:58 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 01:45:37 2017 +0000
----------------------------------------------------------------------
src/kudu/consensus/quorum_util-test.cc | 102 ++++++++++++++++------------
src/kudu/consensus/quorum_util.cc | 13 ++++
src/kudu/consensus/quorum_util.h | 10 +--
src/kudu/master/catalog_manager.cc | 3 +-
4 files changed, 80 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc
index 461ae7a..cd038fc 100644
--- a/src/kudu/consensus/quorum_util-test.cc
+++ b/src/kudu/consensus/quorum_util-test.cc
@@ -410,16 +410,16 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "A", V);
AddPeer(&config, "B", V);
AddPeer(&config, "C", V);
- EXPECT_FALSE(CanEvictReplica(config, 3));
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "", 3));
+ EXPECT_FALSE(CanEvictReplica(config, "", 2));
}
{
RaftConfigPB config;
AddPeer(&config, "A", V, '?');
AddPeer(&config, "B", V, '?');
AddPeer(&config, "C", V, '-');
- EXPECT_FALSE(CanEvictReplica(config, 3));
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "", 3));
+ EXPECT_FALSE(CanEvictReplica(config, "", 2));
}
{
RaftConfigPB config;
@@ -427,18 +427,18 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '-');
AddPeer(&config, "C", V, '+');
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "C", 1, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
- EXPECT_FALSE(CanEvictReplica(config, 3));
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "C", 3));
+ EXPECT_FALSE(CanEvictReplica(config, "C", 2));
}
{
RaftConfigPB config;
AddPeer(&config, "A", V, '+', {{"REPLACE", true}});
AddPeer(&config, "B", V);
AddPeer(&config, "C", V);
- EXPECT_FALSE(CanEvictReplica(config, 3));
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 3));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
}
{
RaftConfigPB config;
@@ -446,9 +446,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '+', {{"REPLACE", false}});
AddPeer(&config, "C", V, '-');
AddPeer(&config, "D", V, '+');
- EXPECT_FALSE(CanEvictReplica(config, 4));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 4));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
EXPECT_EQ("C", uuid_to_evict);
}
{
@@ -457,9 +457,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '?');
AddPeer(&config, "C", V, '+');
AddPeer(&config, "D", V, '+');
- EXPECT_FALSE(CanEvictReplica(config, 4));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 4));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
}
{
@@ -468,9 +468,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '+');
AddPeer(&config, "C", V, '-');
AddPeer(&config, "D", V, '?', {{"REPLACE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 3));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 3));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
EXPECT_EQ("D", uuid_to_evict);
}
{
@@ -479,9 +479,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '+');
AddPeer(&config, "C", V, '-');
AddPeer(&config, "D", V, '-', {{"REPLACE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 3));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 3));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
EXPECT_EQ("D", uuid_to_evict);
}
{
@@ -490,9 +490,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '+');
AddPeer(&config, "C", V, '-');
AddPeer(&config, "D", V, '+', {{"REPLACE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 3));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 3));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
EXPECT_EQ("D", uuid_to_evict);
}
{
@@ -501,9 +501,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '+');
AddPeer(&config, "C", V, '?');
AddPeer(&config, "D", V, '+', {{"REPLACE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 3));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 3));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
EXPECT_EQ("D", uuid_to_evict);
}
{
@@ -512,9 +512,9 @@ TEST(QuorumUtilTest, CanEvictReplicaVoters) {
AddPeer(&config, "B", V, '?');
AddPeer(&config, "C", V, '+');
AddPeer(&config, "D", V, '+');
- EXPECT_FALSE(CanEvictReplica(config, 4));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 4));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 3, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 3, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
}
}
@@ -525,20 +525,20 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
{
RaftConfigPB config;
AddPeer(&config, "A", V);
- EXPECT_FALSE(CanEvictReplica(config, 1));
+ EXPECT_FALSE(CanEvictReplica(config, "", 1));
}
{
RaftConfigPB config;
AddPeer(&config, "A", V, '+');
- EXPECT_FALSE(CanEvictReplica(config, 1));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 1));
}
{
RaftConfigPB config;
AddPeer(&config, "A", V, '+');
AddPeer(&config, "B", N);
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
}
{
@@ -547,18 +547,18 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
AddPeer(&config, "B", V, '+');
AddPeer(&config, "C", N, '+');
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 2, &uuid_to_evict));
EXPECT_EQ("C", uuid_to_evict);
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
EXPECT_EQ("C", uuid_to_evict);
}
{
RaftConfigPB config;
AddPeer(&config, "A", V, '+');
AddPeer(&config, "B", N, '-', {{"PROMOTE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
}
{
@@ -566,9 +566,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
AddPeer(&config, "A", V, '+');
AddPeer(&config, "B", N, '-');
AddPeer(&config, "C", N);
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
}
{
@@ -576,9 +576,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
AddPeer(&config, "A", V, '+');
AddPeer(&config, "B", N, '?');
AddPeer(&config, "C", N, '+');
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
EXPECT_EQ("B", uuid_to_evict);
}
{
@@ -586,9 +586,9 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
AddPeer(&config, "A", V, '+');
AddPeer(&config, "B", V);
AddPeer(&config, "C", N);
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
EXPECT_EQ("C", uuid_to_evict);
}
{
@@ -596,17 +596,17 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
AddPeer(&config, "A", V, '-');
AddPeer(&config, "B", V);
AddPeer(&config, "C", N);
- EXPECT_FALSE(CanEvictReplica(config, 2));
- EXPECT_FALSE(CanEvictReplica(config, 1));
+ EXPECT_FALSE(CanEvictReplica(config, "", 2));
+ EXPECT_FALSE(CanEvictReplica(config, "", 1));
}
{
RaftConfigPB config;
AddPeer(&config, "A", V, '+');
AddPeer(&config, "B", V, '-');
AddPeer(&config, "C", N, '-', {{"PROMOTE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 2));
+ EXPECT_FALSE(CanEvictReplica(config, "A", 2));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 1, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "A", 1, &uuid_to_evict));
}
{
RaftConfigPB config;
@@ -614,12 +614,28 @@ TEST(QuorumUtilTest, CanEvictReplicaNonVoters) {
AddPeer(&config, "B", V, '+');
AddPeer(&config, "C", V, '+');
AddPeer(&config, "D", N, '+', {{"PROMOTE", true}});
- EXPECT_FALSE(CanEvictReplica(config, 3));
+ EXPECT_FALSE(CanEvictReplica(config, "B", 3));
string uuid_to_evict;
- ASSERT_TRUE(CanEvictReplica(config, 2, &uuid_to_evict));
+ ASSERT_TRUE(CanEvictReplica(config, "B", 2, &uuid_to_evict));
EXPECT_EQ("D", uuid_to_evict);
}
}
+// Exhaustively loop through all nodes, each as leader, when over-replicated
+// and ensure that the leader never gets evicted.
+TEST(QuorumUtilTest, TestDontEvictLeader) {
+ const vector<string> nodes = { "A", "B", "C", "D" };
+ RaftConfigPB config;
+ AddPeer(&config, nodes[0], V, '+');
+ AddPeer(&config, nodes[1], V, '+');
+ AddPeer(&config, nodes[2], V, '+');
+ AddPeer(&config, nodes[3], V, '+');
+ for (const auto& leader_node : nodes) {
+ string to_evict;
+ ASSERT_TRUE(CanEvictReplica(config, leader_node, 3, &to_evict));
+ ASSERT_NE(leader_node, to_evict);
+ }
+}
+
} // namespace consensus
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.cc b/src/kudu/consensus/quorum_util.cc
index 8632e3c..c544256 100644
--- a/src/kudu/consensus/quorum_util.cc
+++ b/src/kudu/consensus/quorum_util.cc
@@ -439,8 +439,12 @@ bool IsUnderReplicated(const RaftConfigPB& config, int replication_factor) {
// Whether there is an excess replica to evict.
bool CanEvictReplica(const RaftConfigPB& config,
+ const string& leader_uuid,
int replication_factor,
string* uuid_to_evict) {
+ // If there is no leader, we can't evict anybody.
+ if (leader_uuid.empty()) return false;
+
int num_non_voters_total = 0;
int num_voters_healthy = 0;
@@ -475,6 +479,13 @@ bool CanEvictReplica(const RaftConfigPB& config,
if (healthy && !has_replace) {
++num_voters_healthy;
}
+ // Everything below here is to check for replicas to evict.
+ if (peer_uuid == leader_uuid) {
+ // A leader should always report itself as being healthy.
+ DCHECK(healthy) << peer_uuid << ": " << SecureShortDebugString(config);
+ break;
+ }
+
if (failed && has_replace) {
voter_failed = voter_replace = peer_uuid;
}
@@ -492,6 +503,8 @@ bool CanEvictReplica(const RaftConfigPB& config,
case RaftPeerPB::NON_VOTER:
++num_non_voters_total;
+ DCHECK_NE(peer_uuid, leader_uuid) << peer_uuid
+ << ": non-voter as a leader; " << SecureShortDebugString(config);
if (failed && non_voter_failed.empty()) {
non_voter_failed = peer_uuid;
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/consensus/quorum_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.h b/src/kudu/consensus/quorum_util.h
index b0d12ae..9afe13f 100644
--- a/src/kudu/consensus/quorum_util.h
+++ b/src/kudu/consensus/quorum_util.h
@@ -94,11 +94,13 @@ bool IsUnderReplicated(const RaftConfigPB& config, int replication_factor);
// Check if the given Raft configuration contains at least one extra replica
// which can be removed in accordance with the specified replication
-// factor. If so, then return 'true' and set the UUID of the best suited
-// replica into the 'uuid_to_evict' out parameter. Otherwise, return 'false'.
+// factor and current Raft leader. If so, then return 'true' and set the UUID
+// of the best suited replica into the 'uuid_to_evict' out parameter. Otherwise,
+// return 'false'.
bool CanEvictReplica(const RaftConfigPB& config,
- int replication_factor,
- std::string* uuid_to_evict = nullptr);
+ const std::string& leader_uuid,
+ int replication_factor,
+ std::string* uuid_to_evict = nullptr);
} // namespace consensus
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/4d44c9fe/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 8144d74..abfed16 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3509,7 +3509,8 @@ Status CatalogManager::ProcessTabletReport(
const RaftConfigPB& config = cstate.committed_config();
string to_evict;
if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
- CanEvictReplica(config, replication_factor, &to_evict)) {
+ CanEvictReplica(config, cstate.leader_uuid(), replication_factor,
+ &to_evict)) {
DCHECK(!to_evict.empty());
rpcs.emplace_back(new AsyncEvictReplicaTask(
master_, tablet, cstate, std::move(to_evict)));