You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/10/30 00:25:46 UTC
[2/2] kudu git commit: [rebalancer] location-aware rebalancer (part
6/n)
[rebalancer] location-aware rebalancer (part 6/n)
Added SetReplace() and CheckCompleteReplace() auxiliary fuctions.
A follow-up patch will start using those.
Change-Id: I80b560d70c4d7383ee89917a359b4bb2f41bfd31
Reviewed-on: http://gerrit.cloudera.org:8080/11747
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Will Berkeley <wd...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f731ea00
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f731ea00
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f731ea00
Branch: refs/heads/master
Commit: f731ea004590217fe21b133ed093a7e9d21e7d42
Parents: 87084c1
Author: Alexey Serbin <as...@cloudera.com>
Authored: Fri Oct 19 23:21:26 2018 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Mon Oct 29 23:27:01 2018 +0000
----------------------------------------------------------------------
src/kudu/tools/tool_replica_util.cc | 134 +++++++++++++++++++++++++++++++
src/kudu/tools/tool_replica_util.h | 23 ++++++
2 files changed, 157 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/f731ea00/src/kudu/tools/tool_replica_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_replica_util.cc b/src/kudu/tools/tool_replica_util.cc
index ccec47e..45bac17 100644
--- a/src/kudu/tools/tool_replica_util.cc
+++ b/src/kudu/tools/tool_replica_util.cc
@@ -377,6 +377,140 @@ Status CheckCompleteMove(const vector<string>& master_addresses,
return Status::OK();
}
+Status SetReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+ const string& tablet_id,
+ const string& ts_uuid,
+ const boost::optional<int64_t>& cas_opid_idx,
+ bool* cas_failed) {
+ // Safely set the 'cas_failed' output parameter to 'false' to cover an earlier
+ // return due to an error.
+ if (cas_failed) {
+ *cas_failed = false;
+ }
+ // Find this tablet's leader replica. We need its UUID and RPC address.
+ string leader_uuid;
+ HostPort leader_hp;
+ RETURN_NOT_OK(GetTabletLeader(client, tablet_id, &leader_uuid, &leader_hp));
+ unique_ptr<ConsensusServiceProxy> proxy;
+ RETURN_NOT_OK(BuildProxy(leader_hp.host(), leader_hp.port(), &proxy));
+
+ // Get information on current replication scheme: the move scenario depends
+ // on the replication scheme used.
+ bool is_343_scheme;
+ ConsensusStatePB cstate;
+ RETURN_NOT_OK(GetConsensusState(proxy, tablet_id, leader_uuid,
+ client->default_admin_operation_timeout(),
+ &cstate, &is_343_scheme));
+ // The 3-2-3 replica management scheme (pre-KUDU-1097) does not process
+ // the attribute as expected.
+ if (!is_343_scheme) {
+ return Status::ConfigurationError(
+ "cluster is running in 3-2-3 management scheme");
+ }
+
+ // Check whether the REPLACE attribute is already set for the source replica.
+ for (const auto& peer : cstate.committed_config().peers()) {
+ if (peer.permanent_uuid() == ts_uuid && peer.attrs().replace()) {
+ // The replica is already marked with the REPLACE attribute.
+ return Status::OK();
+ }
+ }
+
+ BulkChangeConfigRequestPB req;
+ auto* change = req.add_config_changes();
+ change->set_type(MODIFY_PEER);
+ *change->mutable_peer()->mutable_permanent_uuid() = ts_uuid;
+ change->mutable_peer()->mutable_attrs()->set_replace(true);
+ consensus::ChangeConfigResponsePB resp;
+ RpcController rpc;
+ rpc.set_timeout(client->default_admin_operation_timeout());
+ req.set_dest_uuid(leader_uuid);
+ req.set_tablet_id(tablet_id);
+ if (cas_opid_idx) {
+ req.set_cas_config_opid_index(*cas_opid_idx);
+ }
+ RETURN_NOT_OK(proxy->BulkChangeConfig(req, &resp, &rpc));
+ if (resp.has_error()) {
+ if (resp.error().code() == tserver::TabletServerErrorPB::CAS_FAILED &&
+ cas_failed) {
+ *cas_failed = true;
+ }
+ return StatusFromPB(resp.error().status());
+ }
+ return Status::OK();
+}
+
+Status CheckCompleteReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+ const string& tablet_id,
+ const string& ts_uuid,
+ bool* is_complete,
+ Status* completion_status) {
+ DCHECK(completion_status);
+ DCHECK(is_complete);
+ *is_complete = false;
+ // Get the latest leader info. It may change later, due to our actions or
+ // outside factors.
+ string leader_uuid;
+ HostPort leader_hp;
+ RETURN_NOT_OK(GetTabletLeader(client, tablet_id, &leader_uuid, &leader_hp));
+ unique_ptr<ConsensusServiceProxy> proxy;
+ RETURN_NOT_OK(BuildProxy(leader_hp.host(), leader_hp.port(), &proxy));
+
+ ConsensusStatePB cstate;
+ bool is_343_scheme;
+ RETURN_NOT_OK(GetConsensusState(proxy, tablet_id, leader_uuid,
+ client->default_admin_operation_timeout(),
+ &cstate, &is_343_scheme));
+ if (!is_343_scheme) {
+ return Status::ConfigurationError(
+ "cluster is not running in 3-4-3 replica management scheme");
+ }
+
+ bool is_all_voters = true;
+ for (const auto& peer : cstate.committed_config().peers()) {
+ if (peer.member_type() != RaftPeerPB::VOTER) {
+ is_all_voters = false;
+ break;
+ }
+ }
+
+ // Check if the replica slated for removal is still in the config.
+ bool ts_uuid_in_config = false;
+ for (const auto& peer : cstate.committed_config().peers()) {
+ if (peer.permanent_uuid() == ts_uuid) {
+ ts_uuid_in_config = true;
+ if (!peer.attrs().replace()) {
+ // Sanity check: the replica must have the REPLACE attribute set.
+ // Otherwise, something has changed in the middle and the replica will
+ // never be evicted, so it does not make sense to await its removal.
+ *is_complete = true;
+ *completion_status = Status::IllegalState(Substitute(
+ "$0: replica $1 does not have the REPLACE attribute set",
+ tablet_id, ts_uuid));
+ }
+ // There is not much sense demoting current leader if a newly added
+ // non-voter hasn't been promoted into voter role yet: the former leader
+ // replica will not be evicted prior the new non-voter replica becomes
+ // is promoted into voter. Demoting former leader too early might even
+ // delay promotion of already caught-up non-leader replica.
+ if (is_all_voters &&
+ leader_uuid == ts_uuid && leader_uuid == cstate.leader_uuid()) {
+ // The leader is the node we intend to remove; make it step down.
+ ignore_result(DoLeaderStepDown(tablet_id, leader_uuid, leader_hp,
+ LeaderStepDownMode::GRACEFUL, boost::none,
+ client->default_admin_operation_timeout()));
+ }
+ break;
+ }
+ }
+
+ if (!ts_uuid_in_config) {
+ *is_complete = true;
+ *completion_status = Status::OK();
+ }
+ return Status::OK();
+}
+
Status ScheduleReplicaMove(const vector<string>& master_addresses,
const client::sp::shared_ptr<client::KuduClient>& client,
http://git-wip-us.apache.org/repos/asf/kudu/blob/f731ea00/src/kudu/tools/tool_replica_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_replica_util.h b/src/kudu/tools/tool_replica_util.h
index 07b47c0..6c13e01 100644
--- a/src/kudu/tools/tool_replica_util.h
+++ b/src/kudu/tools/tool_replica_util.h
@@ -117,6 +117,29 @@ Status CheckCompleteMove(
bool* is_complete,
Status* completion_status);
+// Set the REPLACE attribute for the specified tablet replica. This is a no-op
+// if the replica already has the REPLACE attribute set.
+Status SetReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+ const std::string& tablet_id,
+ const std::string& ts_uuid,
+ const boost::optional<int64_t>& cas_opid_idx,
+ bool* cas_failed = nullptr);
+
+// Check if the replica of the tablet 'tablet_id' previously hosted by tserver
+// identified by 'ts_uuid' is no longer hosted by the tablet server.
+// If there was a problem checking if the replica is in the config, non-OK
+// status is returned. On successful removal of the replica from the tablet
+// server, Status::OK() is returned and 'is_complete' output parameter
+// is set to 'true'. If the replica is still there but there was no error while
+// checking for the status of the replica in the config, Status::OK() is
+// returned and 'is_complete' is set to 'false'. The 'completion_status'
+// parameter contains valid information only if 'is_complete' is set to 'true'.
+Status CheckCompleteReplace(const client::sp::shared_ptr<client::KuduClient>& client,
+ const std::string& tablet_id,
+ const std::string& ts_uuid,
+ bool* is_complete,
+ Status* completion_status);
+
// Schedule replica move operation for tablet with 'tablet_id', moving replica
// from the tablet server 'from_ts_uuid' to tablet server 'to_ts_uuid'.
Status ScheduleReplicaMove(