You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/04/14 05:47:56 UTC

[kudu] 01/03: [catalog manager] cache masters' addresses

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.12.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 069c00c3a490e83c26d076a9f3ac3a9c7a87631b
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Thu Apr 9 15:03:25 2020 -0700

    [catalog manager] cache masters' addresses
    
    While troubleshooting one performance issue if running a big cluster
    with large number of tables and high rate of ConnectToMaster requests,
    in the logs I noticed many reports like the following:
    
      0323 03:59:31.091574 (+114857us) spinlock_profiling.cc:243]
      Waited 114 ms on lock 0x4d0ee8c. stack:
        0000000002398852
        00000000020d45b3
        0000000000a8f8fc
        0000000000aa6300
        000000000221aaa8
        ...
    
    which translates into
    
        (anonymous namespace)::SubmitSpinLockProfileData()
        consensus::RaftConsensus::CommittedConfig()
        master::Master::GetMasterHostPorts()
        master::MasterServiceImpl::ConnectToMaster()
        rpc::GeneratedServiceIf::Handle()
        ...
    
    From the code it became apparent that the lock in question was
      LockGuard l(lock_);
    in RaftConsensus::CommittedConfig() accessor method.
    
    This patch introduces caching of the master host ports, so there is
    no need to fetch the information on the master addresses from the
    consensus metadata every time ConnectToMaster() is called.  Since the
    membership for master Raft consensus is static, it's enough to fetch
    the information on the masters' addresses upon catalog initialization
    and use that information since then.  If we implement dynamic master
    change config, we'll need to refresh the cached state on change config.
    
    This is a follow-up to 14912a1fd78ba7cf4d62bf934ae64d6f6f229ee6.
    
    Change-Id: Ic9afea2d708bd3060b3d9f5b672660e1d2dca910
    Reviewed-on: http://gerrit.cloudera.org:8080/15704
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
    Reviewed-by: Bankim Bhavsar <ba...@cloudera.com>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    (cherry picked from commit b2e4d952b6134a47db383fa2a0a88ac47cbaecc1)
    Reviewed-on: http://gerrit.cloudera.org:8080/15725
    Reviewed-by: Hao Hao <ha...@cloudera.com>
---
 src/kudu/master/catalog_manager.cc |  8 +++-----
 src/kudu/master/catalog_manager.h  | 10 ++++++++++
 src/kudu/master/master.cc          | 10 +++++-----
 src/kudu/master/master.h           |  4 ++--
 src/kudu/master/master_service.cc  | 12 ++++--------
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index af1b5f1..7d79906 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -825,13 +825,11 @@ Status CatalogManager::Init(bool is_first_run) {
     auto_rebalancer_ = std::move(task);
   }
 
+  RETURN_NOT_OK(master_->GetMasterHostPorts(&master_addresses_));
   if (hms::HmsCatalog::IsEnabled()) {
-    vector<HostPortPB> master_addrs_pb;
-    RETURN_NOT_OK(master_->GetMasterHostPorts(&master_addrs_pb));
-
     string master_addresses = JoinMapped(
-      master_addrs_pb,
-      [] (const HostPortPB& hostport) {
+      master_addresses_,
+      [] (const HostPort& hostport) {
         return Substitute("$0:$1", hostport.host(), hostport.port());
       },
       ",");
diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h
index d6d5ff6..6650a76 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -48,6 +48,7 @@
 #include "kudu/util/cow_object.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
 #include "kudu/util/oid_generator.h"
 #include "kudu/util/random.h"
 #include "kudu/util/rw_mutex.h"
@@ -783,6 +784,10 @@ class CatalogManager : public tserver::TabletReplicaLookupIf {
   // name is returned.
   static std::string NormalizeTableName(const std::string& table_name);
 
+  const std::vector<HostPort>& master_addresses() const {
+    return master_addresses_;
+  }
+
  private:
   // These tests call ElectedAsLeaderCb() directly.
   FRIEND_TEST(MasterTest, TestShutdownDuringTableVisit);
@@ -1158,6 +1163,11 @@ class CatalogManager : public tserver::TabletReplicaLookupIf {
   // Always acquire this lock before state_lock_.
   RWMutex leader_lock_;
 
+  // Cached information on master addresses. It's populated in Init() since the
+  // membership of masters' Raft consensus is static (i.e. no new members are
+  // added or any existing removed).
+  std::vector<HostPort> master_addresses_;
+
   // Async operations are accessing some private methods
   // (TODO: this stuff should be deferred and done in the background thread)
   friend class AsyncAlterTable;
diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc
index f55cc6a..34ae5e6 100644
--- a/src/kudu/master/master.cc
+++ b/src/kudu/master/master.cc
@@ -28,7 +28,6 @@
 #include <glog/logging.h>
 
 #include "kudu/cfile/block_cache.h"
-#include "kudu/common/common.pb.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/consensus/metadata.pb.h"
@@ -369,7 +368,7 @@ Status Master::ListMasters(vector<ServerEntryPB>* masters) const {
   return Status::OK();
 }
 
-Status Master::GetMasterHostPorts(vector<HostPortPB>* hostports) const {
+Status Master::GetMasterHostPorts(vector<HostPort>* hostports) const {
   auto consensus = catalog_manager_->master_consensus();
   if (!consensus) {
     return Status::IllegalState("consensus not running");
@@ -377,7 +376,7 @@ Status Master::GetMasterHostPorts(vector<HostPortPB>* hostports) const {
 
   hostports->clear();
   consensus::RaftConfigPB config = consensus->CommittedConfig();
-  for (auto& peer : *config.mutable_peers()) {
+  for (const auto& peer : *config.mutable_peers()) {
     if (peer.member_type() == consensus::RaftPeerPB::VOTER) {
       // In non-distributed master configurations, we don't store our own
       // last known address in the Raft config. So, we'll fill it in from
@@ -385,9 +384,10 @@ Status Master::GetMasterHostPorts(vector<HostPortPB>* hostports) const {
       if (!peer.has_last_known_addr()) {
         DCHECK_EQ(config.peers_size(), 1);
         DCHECK(registration_initialized_.load());
-        hostports->emplace_back(registration_.rpc_addresses(0));
+        DCHECK_GT(registration_.rpc_addresses_size(), 0);
+        hostports->emplace_back(HostPortFromPB(registration_.rpc_addresses(0)));
       } else {
-        hostports->emplace_back(std::move(*peer.mutable_last_known_addr()));
+        hostports->emplace_back(HostPortFromPB(peer.last_known_addr()));
       }
     }
   }
diff --git a/src/kudu/master/master.h b/src/kudu/master/master.h
index 3f77074..28670fe 100644
--- a/src/kudu/master/master.h
+++ b/src/kudu/master/master.h
@@ -32,7 +32,7 @@
 
 namespace kudu {
 
-class HostPortPB;
+class HostPort;
 class MaintenanceManager;
 class MonoDelta;
 class ThreadPool;
@@ -105,7 +105,7 @@ class Master : public kserver::KuduServer {
   // Gets the HostPorts for all of the masters in the cluster.
   // This is not as complete as ListMasters() above, but operates just
   // based on local state.
-  Status GetMasterHostPorts(std::vector<HostPortPB>* hostports) const;
+  Status GetMasterHostPorts(std::vector<HostPort>* hostports) const;
 
   // Crash the master on disk error.
   static void CrashMasterOnDiskError(const std::string& uuid);
diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc
index 50ed51d..b67cd02 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -629,14 +629,10 @@ void MasterServiceImpl::ConnectToMaster(const ConnectToMasterRequestPB* /*req*/,
 
   // Set the info about the other masters, so that the client can verify
   // it has the full set of info.
-  {
-    vector<HostPortPB> hostports;
-    WARN_NOT_OK(server_->GetMasterHostPorts(&hostports),
-                "unable to get HostPorts for masters");
-    resp->mutable_master_addrs()->Reserve(hostports.size());
-    for (auto& hp : hostports) {
-      *resp->add_master_addrs() = std::move(hp);
-    }
+  const auto& addresses = server_->catalog_manager()->master_addresses();
+  resp->mutable_master_addrs()->Reserve(addresses.size());
+  for (const auto& hp : addresses) {
+    *resp->add_master_addrs() = HostPortToPB(hp);
   }
 
   const bool is_leader = l.leader_status().ok();