You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/07/11 20:37:14 UTC
kudu git commit: [quorum_util-test] scenario for node decommissioning
Repository: kudu
Updated Branches:
refs/heads/master 077fe5762 -> 4eefc8d4e
[quorum_util-test] scenario for node decommissioning
Added an additional unit test and a scenario to ensure the current
implementation of the 3-4-3 replica management scheme handles
decommissioning of multiple tablet servers (or nodes) as expected.
Change-Id: I46e0946c474095095e9a8ccd84580e945dd4921b
Reviewed-on: http://gerrit.cloudera.org:8080/10889
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4eefc8d4
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4eefc8d4
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4eefc8d4
Branch: refs/heads/master
Commit: 4eefc8d4e362806957cf92de12e1c456a8b01a7d
Parents: 077fe57
Author: Alexey Serbin <as...@cloudera.com>
Authored: Fri Jul 6 20:19:53 2018 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Wed Jul 11 20:36:22 2018 +0000
----------------------------------------------------------------------
src/kudu/consensus/quorum_util-test.cc | 120 ++++++++++++++++++++++++++++
1 file changed, 120 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/4eefc8d4/src/kudu/consensus/quorum_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc
index 46776e0..a32e368 100644
--- a/src/kudu/consensus/quorum_util-test.cc
+++ b/src/kudu/consensus/quorum_util-test.cc
@@ -1490,6 +1490,23 @@ TEST_P(QuorumUtilHealthPolicyParamTest, MultipleReplicasWithReplaceAttribute) {
}
EXPECT_FALSE(ShouldAddReplica(config, 3, policy));
}
+ {
+ RaftConfigPB config;
+ AddPeer(&config, "A", V, '+', {{"REPLACE", true}});
+ AddPeer(&config, "B", V, '+', {{"REPLACE", true}});
+ AddPeer(&config, "C", V, '+', {{"REPLACE", true}});
+ AddPeer(&config, "D", N, '+', {{"PROMOTE", true}});
+ AddPeer(&config, "E", N, '+', {{"PROMOTE", true}});
+ AddPeer(&config, "F", N, '+', {{"PROMOTE", true}});
+
+ for (const string& leader_replica : { "A", "B", "C" }) {
+ // All non-voters are in good shape and not a single one has been
+ // promoted yet.
+ ASSERT_FALSE(ShouldEvictReplica(config, leader_replica, 3, policy));
+ }
+ // No more replicas are needed for the replacement.
+ EXPECT_FALSE(ShouldAddReplica(config, 3, policy));
+ }
}
// Verify logic of the kudu::consensus::ShouldEvictReplica(), anticipating
@@ -1777,5 +1794,108 @@ TEST(QuorumUtilTest, ReplicaHealthFlapping) {
EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
}
+// A scenario to simulate the process of migrating all replicas of a tablet,
+// where all replicas are marked for replacement simultaneously. This is a
+// possible scenario when decommissioning multiple tablet servers/nodes at once.
+TEST(QuorumUtilTest, ReplaceAllTabletReplicas) {
+ constexpr auto kReplicationFactor = 3;
+ constexpr auto kPolicy = MajorityHealthPolicy::HONOR;
+
+ // The initial tablet report after the tablet replica 'A' has started and
+ // become the leader.
+ RaftConfigPB config;
+ AddPeer(&config, "A", V, '+', {{"REPLACE", true}});
+ AddPeer(&config, "B", V, '+', {{"REPLACE", true}});
+ AddPeer(&config, "C", V, '+', {{"REPLACE", true}});
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // First non-voter replica added.
+ AddPeer(&config, "D", N, '?', {{"PROMOTE", true}});
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Second non-voter replica added.
+ AddPeer(&config, "E", N, '?', {{"PROMOTE", true}});
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Third non-voter replica added.
+ AddPeer(&config, "F", N, '?', {{"PROMOTE", true}});
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ SetPeerHealth(&config, "D", '+');
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Replica 'D' catches up with the leader's WAL and gets promoted.
+ PromotePeer(&config, "D");
+ string to_evict;
+ ASSERT_TRUE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy, &to_evict));
+ EXPECT_TRUE(to_evict == "B" || to_evict == "C");
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Evicting the replica selected by ShouldEvictReplica() above.
+ RemovePeer(&config, to_evict);
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Non-voter replica 'F' become unavailable.
+ SetPeerHealth(&config, "F", '-');
+ ASSERT_TRUE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy, &to_evict));
+ ASSERT_EQ("F", to_evict);
+ EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Evicting the failed non-voter replica, selected by ShouldEvictReplica() above.
+ RemovePeer(&config, to_evict);
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_TRUE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Adding a new non-voter replica.
+ AddPeer(&config, "G", N, '?', {{"PROMOTE", true}});
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // A newly added non-voter replica is in good shape.
+ SetPeerHealth(&config, "G", '+');
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Replica 'E' is reported in good health.
+ SetPeerHealth(&config, "E", '+');
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Replica 'E' catches up with the leader's WAL and gets promoted.
+ PromotePeer(&config, "E");
+ ASSERT_TRUE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy, &to_evict));
+ EXPECT_TRUE(to_evict == "B" || to_evict == "C");
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Evicting the replica selected by ShouldEvictReplica() above.
+ RemovePeer(&config, to_evict);
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Replica 'G' catches up, but replica 'A' cannot yet be evicted since it's
+ // a leader replica.
+ PromotePeer(&config, "G");
+ EXPECT_FALSE(ShouldEvictReplica(config, "A", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Leadership changes from 'A' to 'G', so now it's possible to evict 'A'.
+ ASSERT_TRUE(ShouldEvictReplica(config, "G", kReplicationFactor, kPolicy, &to_evict));
+ ASSERT_EQ("A", to_evict);
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+
+ // Evicting the replica selected by ShouldEvictReplica() above. With that,
+ // the replacement process of all the marked replicas is complete; no further
+ // changes is necessary for the tablet's Raft configuration.
+ RemovePeer(&config, to_evict);
+ EXPECT_FALSE(ShouldEvictReplica(config, "G", kReplicationFactor, kPolicy));
+ EXPECT_FALSE(ShouldAddReplica(config, kReplicationFactor, kPolicy));
+}
+
} // namespace consensus
} // namespace kudu