You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/09/12 06:44:22 UTC

kudu git commit: [catalog_manager] optimization in AsyncAddReplicaTask

Repository: kudu
Updated Branches:
  refs/heads/master 97dbffb95 -> eb109116b


[catalog_manager] optimization in AsyncAddReplicaTask

Introduced a minor optimization on AsyncAddReplicaTask::SendRequest()
while populating the set of blacklisted candidates for an extra replica.

With this patch, the code iterates over the UUIDs of the peers in the
committed config to populate the set of the blacklisted candidates,
rather than filtering all tablet servers using the IsRaftConfigMember()
utility function.

Change-Id: I1f46ef2e3e3f385f0abc7e02e521e03d47569413
Reviewed-on: http://gerrit.cloudera.org:8080/11429
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/eb109116
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/eb109116
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/eb109116

Branch: refs/heads/master
Commit: eb109116bc6e1c89489b503a36951b53dfabd933
Parents: 97dbffb
Author: Alexey Serbin <as...@cloudera.com>
Authored: Tue Sep 11 21:33:57 2018 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Wed Sep 12 06:43:51 2018 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager.cc | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/eb109116/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 6aa2186..922ee3e 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3465,17 +3465,28 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) {
 
   // Select the replica we wish to add to the config.
   // Do not include current members of the config.
-  TSDescriptorVector ts_descs;
-  master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
+  const auto& config = cstate_.committed_config();
   set<shared_ptr<TSDescriptor>> excluded;
-  for (const auto& ts_desc : ts_descs) {
-    if (IsRaftConfigMember(ts_desc->permanent_uuid(), cstate_.committed_config())) {
-      InsertOrDie(&excluded, ts_desc);
+  for (auto i = 0; i < config.peers_size(); ++i) {
+    shared_ptr<TSDescriptor> desc;
+    if (master_->ts_manager()->LookupTSByUUID(config.peers(i).permanent_uuid(),
+                                              &desc)) {
+      EmplaceOrDie(&excluded, std::move(desc));
     }
   }
 
-  auto replacement_replica = SelectReplica(ts_descs, excluded, rng_);
-  if (PREDICT_FALSE(!replacement_replica)) {
+  TSDescriptorVector ts_descs;
+  master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
+
+  // Some of the tablet servers hosting the current members of the config
+  // (see the 'excluded' populated above) might be presumably dead.
+  // Inclusion of a presumably dead tablet server into 'excluded' is OK:
+  // SelectReplica() does not require elements of 'excluded' to be a subset of
+  // 'ts_descs', and 'ts_descs' contains only alive tablet servers. Essentially,
+  // the list of candidate tablet servers to host the extra replica
+  // is 'ts_descs' after blacklisting all elements common with 'excluded'.
+  auto extra_replica = SelectReplica(ts_descs, excluded, rng_);
+  if (PREDICT_FALSE(!extra_replica)) {
     auto msg = Substitute("no extra replica candidate found for tablet $0",
                           tablet_->ToString());
     // Check whether it's a situation when an extra replica cannot be found
@@ -3484,9 +3495,7 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) {
     // replica management scheme (see --raft_prepare_replacement_before_eviction
     // flag), at least N+1 tablet servers should be registered to find a place
     // for an extra replica.
-    TSDescriptorVector all_descriptors;
-    master_->ts_manager()->GetAllDescriptors(&all_descriptors);
-    const auto num_tservers_registered = all_descriptors.size();
+    const auto num_tservers_registered = master_->ts_manager()->GetCount();
 
     auto replication_factor = 0;
     {
@@ -3514,13 +3523,13 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) {
   req.set_type(consensus::ADD_PEER);
   req.set_cas_config_opid_index(cstate_.committed_config().opid_index());
   RaftPeerPB* peer = req.mutable_server();
-  peer->set_permanent_uuid(replacement_replica->permanent_uuid());
+  peer->set_permanent_uuid(extra_replica->permanent_uuid());
   if (FLAGS_raft_prepare_replacement_before_eviction &&
       member_type_ == RaftPeerPB::NON_VOTER) {
     peer->mutable_attrs()->set_promote(true);
   }
   ServerRegistrationPB peer_reg;
-  replacement_replica->GetRegistration(&peer_reg);
+  extra_replica->GetRegistration(&peer_reg);
   CHECK_GT(peer_reg.rpc_addresses_size(), 0);
   *peer->mutable_last_known_addr() = peer_reg.rpc_addresses(0);
   peer->set_member_type(member_type_);