You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2017/10/24 20:06:35 UTC
kudu git commit: consensus: support changing between VOTER and
NON_VOTER
Repository: kudu
Updated Branches:
refs/heads/master e938aaa21 -> 28a671365
consensus: support changing between VOTER and NON_VOTER
This patch implements changing from VOTER to NON_VOTER and vice-versa.
Added an integration test scenario for NON_VOTER --> VOTER,
VOTER --> NON_VOTER changes and other use cases.
Change-Id: I978cbed683b0e95a903f2bd2b57496aee71cb33a
Reviewed-on: http://gerrit.cloudera.org:8080/8297
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/28a67136
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/28a67136
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/28a67136
Branch: refs/heads/master
Commit: 28a671365d6d38da966481daf937b3776e3d4852
Parents: e938aaa
Author: Mike Percy <mp...@apache.org>
Authored: Wed Oct 18 01:12:26 2017 +0900
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Oct 24 18:16:36 2017 +0000
----------------------------------------------------------------------
src/kudu/consensus/quorum_util-test.cc | 10 +
src/kudu/consensus/quorum_util.cc | 6 +
src/kudu/consensus/quorum_util.h | 4 +
src/kudu/consensus/raft_consensus.cc | 61 ++++-
src/kudu/consensus/raft_consensus.h | 8 +
.../integration-tests/cluster_itest_util.cc | 35 ++-
src/kudu/integration-tests/cluster_itest_util.h | 10 +
.../raft_consensus_nonvoter-itest.cc | 225 ++++++++++++++++++-
8 files changed, 345 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/consensus/quorum_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util-test.cc b/src/kudu/consensus/quorum_util-test.cc
index 4b2ad0c..8864977 100644
--- a/src/kudu/consensus/quorum_util-test.cc
+++ b/src/kudu/consensus/quorum_util-test.cc
@@ -179,6 +179,16 @@ TEST(QuorumUtilTest, TestIsRaftConfigVoter) {
ASSERT_FALSE(IsRaftConfigVoter("B", config));
ASSERT_FALSE(IsRaftConfigVoter("C", config));
ASSERT_FALSE(IsRaftConfigVoter(no_member_type_peer_uuid, config));
+
+ RaftPeerPB* peer_a;
+ ASSERT_OK(GetRaftConfigMember(&config, "A", &peer_a));
+ RaftPeerPB* peer_b;
+ ASSERT_OK(GetRaftConfigMember(&config, "B", &peer_b));
+ ASSERT_FALSE(ReplicaTypesEqual(*peer_a, *peer_b));
+ ASSERT_TRUE(ReplicaTypesEqual(*peer_b, *peer_b));
+ RaftPeerPB* peer_c;
+ ASSERT_OK(GetRaftConfigMember(&config, "C", &peer_c));
+ ASSERT_FALSE(ReplicaTypesEqual(*peer_b, *peer_c));
}
} // namespace consensus
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/consensus/quorum_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.cc b/src/kudu/consensus/quorum_util.cc
index f81f2a7..90d9671 100644
--- a/src/kudu/consensus/quorum_util.cc
+++ b/src/kudu/consensus/quorum_util.cc
@@ -98,6 +98,12 @@ bool RemoveFromRaftConfig(RaftConfigPB* config, const string& uuid) {
return true;
}
+bool ReplicaTypesEqual(const RaftPeerPB& peer1, const RaftPeerPB& peer2) {
+ // TODO(mpercy): Include comparison of replica intentions once they are
+ // implemented.
+ return peer1.member_type() == peer2.member_type();
+}
+
int CountVoters(const RaftConfigPB& config) {
int voters = 0;
for (const RaftPeerPB& peer : config.peers()) {
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/consensus/quorum_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/quorum_util.h b/src/kudu/consensus/quorum_util.h
index 26b6e78..3bd0afc 100644
--- a/src/kudu/consensus/quorum_util.h
+++ b/src/kudu/consensus/quorum_util.h
@@ -56,6 +56,10 @@ Status GetRaftConfigLeader(ConsensusStatePB* cstate, RaftPeerPB** peer_pb);
// Returns true on success.
bool RemoveFromRaftConfig(RaftConfigPB* config, const std::string& uuid);
+// Returns true iff the two peers have equivalent replica types and associated
+// options.
+bool ReplicaTypesEqual(const RaftPeerPB& peer1, const RaftPeerPB& peer2);
+
// Counts the number of voters in the configuration.
int CountVoters(const RaftConfigPB& config);
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/consensus/raft_consensus.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index 581bace..5f7f392 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -316,7 +316,7 @@ Status RaftConsensus::Start(const ConsensusBootstrapInfo& info,
}
}
- // Now assume "follower" duties.
+ // Now assume non-leader replica duties.
RETURN_NOT_OK(BecomeReplicaUnlocked(fd_initial_delta));
SetStateUnlocked(kRunning);
@@ -585,15 +585,9 @@ Status RaftConsensus::BecomeReplicaUnlocked(boost::optional<MonoDelta> fd_delta)
<< ToStringUnlocked();
ClearLeaderUnlocked();
- if (consensus::IsVoterRole(cmeta_->active_role())) {
- // A voter should run failure detector, if not a leader.
- EnableFailureDetector(std::move(fd_delta));
- } else {
- // A non-voter should not start leader elections. The leader failure
- // detector should be re-enabled once the non-voter replica is promoted
- // to voter replica.
- DisableFailureDetector();
- }
+ // Enable/disable leader failure detection if becoming VOTER/NON_VOTER replica
+ // correspondingly.
+ ToggleFailureDetector(std::move(fd_delta));
// Now that we're a replica, we can allow voting for other nodes.
withhold_votes_until_ = MonoTime::Min();
@@ -1648,8 +1642,31 @@ Status RaftConsensus::ChangeConfig(const ChangeConfigRequestPB& req,
// TODO(mpercy): Support changing between VOTER and NON_VOTER.
case CHANGE_REPLICA_TYPE:
+ if (server.member_type() == RaftPeerPB::UNKNOWN_MEMBER_TYPE) {
+ return Status::InvalidArgument("Cannot change replica type to UNKNOWN_MEMBER_TYPE");
+ }
+ if (server_uuid == peer_uuid()) {
+ return Status::InvalidArgument(
+ Substitute("Cannot change the replica type of peer $0 because it is the leader. "
+ "Force another leader to be elected to modify the type of this replica. "
+ "Consensus state: $1",
+ server_uuid,
+ SecureShortDebugString(cmeta_->ToConsensusStatePB())));
+ }
+ RaftPeerPB* peer_pb;
+ RETURN_NOT_OK(GetRaftConfigMember(&new_config, server_uuid, &peer_pb));
+ // Validate the changes.
+ if (ReplicaTypesEqual(*peer_pb, server)) {
+ return Status::InvalidArgument("Cannot change replica type to same type");
+ }
+ peer_pb->set_member_type(server.member_type());
+ // TODO(mpercy): Copy over replica intentions when implemented.
+ break;
+
default:
- return Status::NotSupported("Role change is not yet implemented.");
+ return Status::NotSupported(Substitute(
+ "$0: unsupported type of configuration change",
+ ChangeConfigType_Name(type)));
}
RETURN_NOT_OK(ReplicateConfigChangeUnlocked(
@@ -2401,6 +2418,10 @@ void RaftConsensus::CompleteConfigChangeRoundUnlocked(ConsensusRound* round, con
LOG_WITH_PREFIX_UNLOCKED(INFO) << "Aborting config change with OpId "
<< op_id << ": " << status.ToString();
cmeta_->clear_pending_config();
+
+ // Disable leader failure detection if transitioning from VOTER to
+ // NON_VOTER and vice versa.
+ ToggleFailureDetector();
} else {
LOG_WITH_PREFIX_UNLOCKED(INFO)
<< "Skipping abort of non-pending config change with OpId "
@@ -2448,7 +2469,6 @@ void RaftConsensus::CompleteConfigChangeRoundUnlocked(ConsensusRound* round, con
}
}
-
void RaftConsensus::EnableFailureDetector(boost::optional<MonoDelta> delta) {
if (PREDICT_TRUE(FLAGS_enable_leader_failure_detection)) {
failure_detector_->Start(std::move(delta));
@@ -2461,6 +2481,18 @@ void RaftConsensus::DisableFailureDetector() {
}
}
+void RaftConsensus::ToggleFailureDetector(boost::optional<MonoDelta> delta) {
+ if (IsRaftConfigVoter(peer_uuid(), cmeta_->ActiveConfig())) {
+ // A non-leader voter replica should run failure detector.
+ EnableFailureDetector(std::move(delta));
+ } else {
+ // A non-voter should not start leader elections. The leader failure
+ // detector should be re-enabled once the non-voter replica is promoted
+ // to voter replica.
+ DisableFailureDetector();
+ }
+}
+
void RaftConsensus::SnoozeFailureDetector(boost::optional<string> reason_for_log,
boost::optional<MonoDelta> delta) {
if (PREDICT_TRUE(failure_detector_ && FLAGS_enable_leader_failure_detection)) {
@@ -2583,6 +2615,11 @@ Status RaftConsensus::SetPendingConfigUnlocked(const RaftConfigPB& new_config) {
<< "New pending config: " << SecureShortDebugString(new_config);
}
cmeta_->set_pending_config(new_config);
+
+ // Disable leader failure detection if transitioning from VOTER to NON_VOTER
+ // and vice versa.
+ ToggleFailureDetector();
+
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/consensus/raft_consensus.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.h b/src/kudu/consensus/raft_consensus.h
index 5ab93b8..7229bb7 100644
--- a/src/kudu/consensus/raft_consensus.h
+++ b/src/kudu/consensus/raft_consensus.h
@@ -563,6 +563,14 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
// If the failure detector is already disabled, has no effect.
void DisableFailureDetector();
+ // Disable leader failure detector if transitioning from VOTER to NON_VOTER,
+ // and vice versa. The decision is based on the type of membership of the
+ // peer in the active Raft configuration.
+ //
+ // If it's time to enable the leader failure detection, the specified
+ // 'delta' value is used as described in EnableFailureDetector()'s comment.
+ void ToggleFailureDetector(boost::optional<MonoDelta> delta = boost::none);
+
// "Reset" the failure detector to indicate leader activity.
//
// When this is called a failure is guaranteed not to be detected before
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/integration-tests/cluster_itest_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.cc b/src/kudu/integration-tests/cluster_itest_util.cc
index e956804..cac682d 100644
--- a/src/kudu/integration-tests/cluster_itest_util.cc
+++ b/src/kudu/integration-tests/cluster_itest_util.cc
@@ -738,7 +738,40 @@ Status RemoveServer(const TServerDetails* leader,
rpc.set_timeout(timeout);
RETURN_NOT_OK(leader->consensus_proxy->ChangeConfig(req, &resp, &rpc));
if (resp.has_error()) {
- if (error_code) *error_code = resp.error().code();
+ if (error_code) {
+ *error_code = resp.error().code();
+ }
+ return StatusFromPB(resp.error().status());
+ }
+ return Status::OK();
+}
+
+Status ChangeReplicaType(const TServerDetails* leader,
+ const std::string& tablet_id,
+ const TServerDetails* target_replica,
+ RaftPeerPB::MemberType replica_type,
+ const MonoDelta& timeout,
+ const boost::optional<int64_t>& cas_config_index,
+ tserver::TabletServerErrorPB::Code* error_code) {
+ ChangeConfigRequestPB req;
+ req.set_dest_uuid(leader->uuid());
+ req.set_tablet_id(tablet_id);
+ req.set_type(consensus::CHANGE_REPLICA_TYPE);
+ if (cas_config_index) {
+ req.set_cas_config_opid_index(*cas_config_index);
+ }
+ RaftPeerPB* peer = req.mutable_server();
+ peer->set_permanent_uuid(target_replica->uuid());
+ peer->set_member_type(replica_type);
+
+ ChangeConfigResponsePB resp;
+ RpcController rpc;
+ rpc.set_timeout(timeout);
+ RETURN_NOT_OK(leader->consensus_proxy->ChangeConfig(req, &resp, &rpc));
+ if (resp.has_error()) {
+ if (error_code) {
+ *error_code = resp.error().code();
+ }
return StatusFromPB(resp.error().status());
}
return Status::OK();
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/integration-tests/cluster_itest_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.h b/src/kudu/integration-tests/cluster_itest_util.h
index 63c28c7..9252dc9 100644
--- a/src/kudu/integration-tests/cluster_itest_util.h
+++ b/src/kudu/integration-tests/cluster_itest_util.h
@@ -288,6 +288,16 @@ Status RemoveServer(const TServerDetails* leader,
const boost::optional<int64_t>& cas_config_index = boost::none,
tserver::TabletServerErrorPB::Code* error_code = nullptr);
+// Change type of the given replica to the specified type.
+// The RPC request is sent to 'leader'.
+Status ChangeReplicaType(const TServerDetails* leader,
+ const std::string& tablet_id,
+ const TServerDetails* target_replica,
+ consensus::RaftPeerPB::MemberType replica_type,
+ const MonoDelta& timeout,
+ const boost::optional<int64_t>& cas_config_index = boost::none,
+ tserver::TabletServerErrorPB::Code* error_code = nullptr);
+
// Get the list of tablets from the remote server.
Status ListTablets(const TServerDetails* ts,
const MonoDelta& timeout,
http://git-wip-us.apache.org/repos/asf/kudu/blob/28a67136/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
index b4287d7..6b68914 100644
--- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
@@ -16,12 +16,14 @@
// under the License.
#include <cstdint>
+#include <ostream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
#include <gtest/gtest.h>
#include "kudu/consensus/consensus.pb.h"
@@ -97,6 +99,14 @@ class RaftConsensusNonVoterITest : public RaftConsensusITestBase {
Status RemoveReplica(const string& tablet_id,
const TServerDetails* replica,
const MonoDelta& timeout);
+
+ // Change replica membership to the specified type, i.e. promote a replica
+ // in case of RaftPeerPB::VOTER member_type, and demote a replica in case of
+ // RaftPeerPB::NON_VOTER member_type.
+ Status ChangeReplicaMembership(RaftPeerPB::MemberType member_type,
+ const string& tablet_id,
+ const TServerDetails* replica,
+ const MonoDelta& timeout);
};
Status RaftConsensusNonVoterITest::GetTabletCopySourceSessionsCount(
@@ -142,6 +152,22 @@ Status RaftConsensusNonVoterITest::RemoveReplica(const string& tablet_id,
return RemoveServer(leader, tablet_id, replica, timeout);
}
+Status RaftConsensusNonVoterITest::ChangeReplicaMembership(
+ RaftPeerPB::MemberType member_type,
+ const string& tablet_id,
+ const TServerDetails* replica,
+ const MonoDelta& timeout) {
+ TServerDetails* leader = nullptr;
+ RETURN_NOT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader));
+
+ // Wait for at least one operation committed by the leader in current term.
+ // Otherwise, any Raft configuration change attempt might end up with error
+ // 'Illegal state: Leader has not yet committed an operation in its own term'.
+ RETURN_NOT_OK(WaitForOpFromCurrentTerm(leader, tablet_id,
+ consensus::COMMITTED_OPID, timeout));
+ return ChangeReplicaType(leader, tablet_id, replica, member_type, timeout);
+}
+
// Ensure that adding a NON_VOTER replica is properly handled by the system:
//
// * Updating Raft configuration for tablet by adding a NON_VOTER replica
@@ -378,7 +404,7 @@ TEST_F(RaftConsensusNonVoterITest, AddThenRemoveNonVoterReplica) {
new_replica_idx, tablet_id, { TABLET_DATA_TOMBSTONED }, kTimeout));
// The added and then removed tablet replica should be gone, and the master
- // should report approrpiate replica count at this point. The tablet leader
+ // should report appropriate replica count at this point. The tablet leader
// should be established.
bool has_leader;
master::TabletLocationsPB tablet_locations;
@@ -409,6 +435,11 @@ TEST_F(RaftConsensusNonVoterITest, AddThenRemoveNonVoterReplica) {
// * does not start leader elections
// * returns an error on RunLeaderElection() RPC call
TEST_F(RaftConsensusNonVoterITest, NonVoterReplicasDoNotVote) {
+ if (!AllowSlowTests()) {
+ LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
+ return;
+ }
+
const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
const int kOriginalReplicasNum = 2;
const int kHbIntervalMs = 64;
@@ -529,5 +560,197 @@ TEST_F(RaftConsensusNonVoterITest, NonVoterReplicasDoNotVote) {
}
}
+// Test that it's possible to promote a NON_VOTER replica into a VOTER one
+// and demote a VOTER replica into NON_VOTER (if not a leader). The new VOTER
+// replica should be able to participate in elections, and the quorum should
+// change accordingly. Promote and demote a replica under active workload.
+// Promote a replica and remove it, making sure it gets tombstoned.
+TEST_F(RaftConsensusNonVoterITest, PromoteAndDemote) {
+ if (!AllowSlowTests()) {
+ LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
+ return;
+ }
+
+ const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
+ const int kInitialReplicasNum = 3;
+ const int kHbIntervalMs = 50;
+ const vector<string> kTserverFlags = {
+ Substitute("--raft_heartbeat_interval_ms=$0", kHbIntervalMs),
+ };
+
+ FLAGS_num_tablet_servers = 4;
+ FLAGS_num_replicas = kInitialReplicasNum;
+ NO_FATALS(BuildAndStart(kTserverFlags));
+ ASSERT_EQ(4, tablet_servers_.size());
+ ASSERT_EQ(kInitialReplicasNum, tablet_replicas_.size());
+
+ const string& tablet_id = tablet_id_;
+ TabletServerMap replica_servers;
+ for (const auto& e : tablet_replicas_) {
+ if (e.first == tablet_id) {
+ replica_servers.emplace(e.second->uuid(), e.second);
+ }
+ }
+ ASSERT_EQ(FLAGS_num_replicas, replica_servers.size());
+
+ ASSERT_OK(WaitForServersToAgree(kTimeout, replica_servers, tablet_id, 1));
+
+ TServerDetails* new_replica = nullptr;
+ for (const auto& ts : tablet_servers_) {
+ if (replica_servers.find(ts.first) == replica_servers.end()) {
+ new_replica = ts.second;
+ break;
+ }
+ }
+ ASSERT_NE(nullptr, new_replica);
+
+ ASSERT_OK(AddReplica(tablet_id, new_replica, RaftPeerPB::NON_VOTER, kTimeout));
+
+ {
+ // It should not be possible to demote the tablet leader.
+ TServerDetails* leader;
+ ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader));
+ const Status s_demote = ChangeReplicaMembership(RaftPeerPB::NON_VOTER,
+ tablet_id, leader, kTimeout);
+ const auto s_demote_str = s_demote.ToString();
+ ASSERT_TRUE(s_demote.IsInvalidArgument()) << s_demote_str;
+ ASSERT_STR_MATCHES(s_demote_str,
+ "Cannot change the replica type of peer .* because it is the leader");
+
+ // It should not be possible to promote a leader replica since it's
+ // already a voter.
+ const Status s_promote = ChangeReplicaMembership(RaftPeerPB::VOTER,
+ tablet_id, leader, kTimeout);
+ const auto s_promote_str = s_promote.ToString();
+ ASSERT_TRUE(s_promote.IsInvalidArgument()) << s_promote_str;
+ ASSERT_STR_MATCHES(s_promote_str,
+ "Cannot change the replica type of peer .* because it is the leader");
+ }
+
+ {
+ // It should not be possible to demote a non-voter.
+ Status s = ChangeReplicaMembership(RaftPeerPB::NON_VOTER,
+ tablet_id, new_replica, kTimeout);
+ const auto s_str = s.ToString();
+ ASSERT_TRUE(s.IsInvalidArgument()) << s_str;
+ ASSERT_STR_CONTAINS(s_str, "Cannot change replica type to same type");
+ }
+
+ // It should be possible to promote a non-voter to voter.
+ ASSERT_OK(ChangeReplicaMembership(RaftPeerPB::VOTER, tablet_id,
+ new_replica, kTimeout));
+
+ {
+ // It should not be possible to promote a voter since it's a voter already.
+ Status s = ChangeReplicaMembership(RaftPeerPB::VOTER,
+ tablet_id, new_replica, kTimeout);
+ const auto s_str = s.ToString();
+ ASSERT_TRUE(s.IsInvalidArgument()) << s_str;
+ ASSERT_STR_CONTAINS(s_str, "Cannot change replica type to same type");
+ }
+
+ // Make sure the promoted replica is in the quorum.
+ {
+ // Pause both the current leader and the new voter, and make sure
+ // no new leader can be elected since the majority is now 3 out of 4 voters.
+ TServerDetails* leader;
+ ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader));
+ ExternalTabletServer* leader_ts =
+ cluster_->tablet_server_by_uuid(leader->uuid());
+ ASSERT_OK(leader_ts->Pause());
+ auto cleanup_leader = MakeScopedCleanup([&]() {
+ ASSERT_OK(leader_ts->Resume());
+ });
+
+ ExternalTabletServer* new_replica_ts =
+ cluster_->tablet_server_by_uuid(new_replica->uuid());
+ ASSERT_OK(new_replica_ts->Pause());
+ auto cleanup_new_replica = MakeScopedCleanup([&]() {
+ ASSERT_OK(new_replica_ts->Resume());
+ });
+
+ {
+ TServerDetails* leader;
+ Status s = GetLeaderReplicaWithRetries(tablet_id, &leader, 10);
+ const auto s_str = s.ToString();
+ ASSERT_TRUE(s.IsNotFound()) << s_str;
+ ASSERT_STR_CONTAINS(s_str, "leader replica not found");
+ }
+
+ // Resume the newly promoted replica: there should be 3 out of 4 voters
+ // available now, so they should be able to elect a leader among them.
+ ASSERT_OK(new_replica_ts->Resume());
+ ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader));
+
+ // It should be possible to demote a replica back: 2 out of 3 voters
+ // of the result configuration are available, so 2 active voters out of 3
+ // (1 is still paused) and 1 non-votes in the tablet constitute a functional
+ // tablet, so they should be able to elect a leader replica among them.
+ ASSERT_OK(ChangeReplicaMembership(RaftPeerPB::NON_VOTER,
+ tablet_id, new_replica, kTimeout));
+ ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader));
+ }
+
+ // Promote/demote a replica under active workload.
+ {
+ TestWorkload workload(cluster_.get());
+ workload.set_table_name(kTableId);
+ workload.Setup();
+ workload.Start();
+ while (workload.rows_inserted() < 10) {
+ SleepFor(MonoDelta::FromMilliseconds(10));
+ }
+
+ ASSERT_OK(ChangeReplicaMembership(RaftPeerPB::VOTER,
+ tablet_id, new_replica, kTimeout));
+ auto rows_inserted = workload.rows_inserted();
+ while (workload.rows_inserted() < rows_inserted * 2) {
+ SleepFor(MonoDelta::FromMilliseconds(10));
+ }
+
+ ASSERT_OK(ChangeReplicaMembership(RaftPeerPB::NON_VOTER,
+ tablet_id, new_replica, kTimeout));
+ rows_inserted = workload.rows_inserted();
+ while (workload.rows_inserted() < rows_inserted * 2) {
+ SleepFor(MonoDelta::FromMilliseconds(10));
+ }
+ workload.StopAndJoin();
+
+ ASSERT_OK(WaitForServersToAgree(kTimeout, replica_servers, tablet_id, 1));
+
+ ClusterVerifier v(cluster_.get());
+ NO_FATALS(v.CheckCluster());
+ NO_FATALS(v.CheckRowCount(workload.table_name(),
+ ClusterVerifier::EXACTLY,
+ workload.rows_inserted()));
+ }
+
+ // Promote the non-voter replica again and then remove it when it's a voter,
+ // making sure it gets tombstoned.
+ {
+ ASSERT_OK(ChangeReplicaMembership(RaftPeerPB::VOTER,
+ tablet_id, new_replica, kTimeout));
+ ASSERT_OK(WaitForServersToAgree(kTimeout, replica_servers, tablet_id, 1));
+ ASSERT_OK(RemoveReplica(tablet_id, new_replica, kTimeout));
+
+ // Verify the removed replica gets tombstoned.
+ const int new_replica_idx =
+ cluster_->tablet_server_index_by_uuid(new_replica->uuid());
+ ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(
+ new_replica_idx, tablet_id, { TABLET_DATA_TOMBSTONED }, kTimeout));
+
+ // The removed tablet replica should be gone, and the master should report
+ // appropriate replica count at this point.
+ bool has_leader;
+ master::TabletLocationsPB tablet_locations;
+ ASSERT_OK(WaitForReplicasReportedToMaster(
+ cluster_->master_proxy(), kInitialReplicasNum, tablet_id, kTimeout,
+ WAIT_FOR_LEADER, &has_leader, &tablet_locations));
+ ASSERT_TRUE(has_leader);
+ }
+
+ NO_FATALS(cluster_->AssertNoCrashes());
+}
+
} // namespace tserver
} // namespace kudu