You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2019/04/09 20:39:11 UTC

[kudu] 03/04: [HaClient] introduce client metrics

This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 9382d072111117dc7e9410e8eeb85781e9702384
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Fri Apr 5 20:11:56 2019 -0700

    [HaClient] introduce client metrics
    
    Introduced metrics for HA Sentry client in SentryAuthzProvider.
    Added corresponding test to verify how the newly introduced
    metrics work.
    
    Change-Id: I86e12ee6ee34f39c11f18b21e846cc5a1dee6b6f
    Reviewed-on: http://gerrit.cloudera.org:8080/12951
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/master/CMakeLists.txt                |   1 +
 src/kudu/master/catalog_manager.cc            |  11 ++-
 src/kudu/master/sentry_authz_provider-test.cc | 130 ++++++++++++++++++++++++--
 src/kudu/master/sentry_authz_provider.cc      |  13 +++
 src/kudu/master/sentry_authz_provider.h       |   4 +
 src/kudu/master/sentry_client_metrics.cc      |  58 ++++++++++++
 src/kudu/master/sentry_client_metrics.h       |  33 +++++++
 src/kudu/thrift/client.h                      |  37 +++++++-
 src/kudu/thrift/ha_client_metrics.h           |  41 ++++++++
 9 files changed, 313 insertions(+), 15 deletions(-)

diff --git a/src/kudu/master/CMakeLists.txt b/src/kudu/master/CMakeLists.txt
index 6409499..1ada2e7 100644
--- a/src/kudu/master/CMakeLists.txt
+++ b/src/kudu/master/CMakeLists.txt
@@ -45,6 +45,7 @@ set(MASTER_SRCS
   mini_master.cc
   placement_policy.cc
   sentry_authz_provider.cc
+  sentry_client_metrics.cc
   sys_catalog.cc
   ts_descriptor.cc
   ts_manager.cc)
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index b3480ea..ced919f 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -44,6 +44,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <cstdlib>
+#include <ctime>
 #include <functional>
 #include <iterator>
 #include <map>
@@ -53,7 +54,6 @@
 #include <ostream>
 #include <set>
 #include <string>
-#include <time.h>
 #include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
@@ -132,6 +132,7 @@
 #include "kudu/util/fault_injection.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/mutex.h"
 #include "kudu/util/pb_util.h"
@@ -726,7 +727,7 @@ Status CatalogManager::Init(bool is_first_run) {
   RETURN_NOT_OK_PREPEND(sys_catalog_->WaitUntilRunning(),
                         "Failed waiting for the catalog tablet to run");
 
-  authz_provider_.reset(new master::DefaultAuthzProvider());
+  authz_provider_.reset(new DefaultAuthzProvider);
 
   if (hms::HmsCatalog::IsEnabled()) {
     vector<HostPortPB> master_addrs_pb;
@@ -756,7 +757,7 @@ Status CatalogManager::Init(bool is_first_run) {
 
     // Use SentryAuthzProvider when both Sentry and the HMS integration are enabled.
     if (SentryAuthzProvider::IsEnabled()) {
-      authz_provider_.reset(new master::SentryAuthzProvider());
+      authz_provider_.reset(new SentryAuthzProvider(master_->metric_entity()));
     }
   }
 
@@ -4603,7 +4604,7 @@ void CatalogManager::SendCreateTabletRequest(const scoped_refptr<TabletInfo>& ta
 
 Status CatalogManager::BuildLocationsForTablet(
     const scoped_refptr<TabletInfo>& tablet,
-    master::ReplicaTypeFilter filter,
+    ReplicaTypeFilter filter,
     TabletLocationsPB* locs_pb) {
   TabletMetadataLock l_tablet(tablet.get(), LockMode::READ);
   if (PREDICT_FALSE(l_tablet.data().is_deleted())) {
@@ -4674,7 +4675,7 @@ Status CatalogManager::BuildLocationsForTablet(
 }
 
 Status CatalogManager::GetTabletLocations(const string& tablet_id,
-                                          master::ReplicaTypeFilter filter,
+                                          ReplicaTypeFilter filter,
                                           TabletLocationsPB* locs_pb,
                                           optional<const string&> user) {
   leader_lock_.AssertAcquiredForReading();
diff --git a/src/kudu/master/sentry_authz_provider-test.cc b/src/kudu/master/sentry_authz_provider-test.cc
index 04aff81..3266823 100644
--- a/src/kudu/master/sentry_authz_provider-test.cc
+++ b/src/kudu/master/sentry_authz_provider-test.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/master/sentry_authz_provider.h"
 
+#include <cstdint>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -28,6 +29,7 @@
 
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/map-util.h"
+#include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/sentry_authz_provider-test-base.h"
 #include "kudu/sentry/mini_sentry.h"
@@ -36,6 +38,8 @@
 #include "kudu/sentry/sentry_authorizable_scope.h"
 #include "kudu/sentry/sentry_client.h"
 #include "kudu/sentry/sentry_policy_service_types.h"
+#include "kudu/util/hdr_histogram.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/random.h"
 #include "kudu/util/random_util.h"
@@ -50,6 +54,18 @@ DECLARE_string(sentry_service_security_mode);
 DECLARE_string(server_name);
 DECLARE_string(trusted_user_acl);
 
+METRIC_DECLARE_counter(sentry_client_tasks_successful);
+METRIC_DECLARE_counter(sentry_client_tasks_failed_fatal);
+METRIC_DECLARE_counter(sentry_client_tasks_failed_nonfatal);
+METRIC_DECLARE_counter(sentry_client_reconnections_succeeded);
+METRIC_DECLARE_counter(sentry_client_reconnections_failed);
+METRIC_DECLARE_histogram(sentry_client_task_execution_time_us);
+
+using kudu::sentry::AuthorizableScopesSet;
+using kudu::sentry::SentryAction;
+using kudu::sentry::SentryActionsSet;
+using kudu::sentry::SentryTestBase;
+using kudu::sentry::SentryAuthorizableScope;
 using sentry::TSentryAuthorizable;
 using sentry::TSentryGrantOption;
 using sentry::TSentryPrivilege;
@@ -60,13 +76,6 @@ using std::vector;
 using strings::Substitute;
 
 namespace kudu {
-
-using sentry::AuthorizableScopesSet;
-using sentry::SentryAction;
-using sentry::SentryActionsSet;
-using sentry::SentryTestBase;
-using sentry::SentryAuthorizableScope;
-
 namespace master {
 
 TEST(SentryAuthzProviderStaticTest, TestTrustedUserAcl) {
@@ -151,10 +160,13 @@ class SentryAuthzProviderTest : public SentryTestBase {
   void SetUp() override {
     SentryTestBase::SetUp();
 
+    metric_entity_ = METRIC_ENTITY_server.Instantiate(
+        &metric_registry_, "sentry_auth_provider-test");
+
     // Configure the SentryAuthzProvider flags.
     FLAGS_sentry_service_security_mode = KerberosEnabled() ? "kerberos" : "none";
     FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
-    sentry_authz_provider_.reset(new SentryAuthzProvider());
+    sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
     ASSERT_OK(sentry_authz_provider_->Start());
   }
 
@@ -174,7 +186,23 @@ class SentryAuthzProviderTest : public SentryTestBase {
     return false;
   }
 
+#define GET_GAUGE_READINGS(func_name, counter_name_suffix) \
+  int64_t func_name() { \
+    scoped_refptr<Counter> gauge(metric_entity_->FindOrCreateCounter( \
+        &METRIC_sentry_client_##counter_name_suffix)); \
+    CHECK(gauge); \
+    return gauge->value(); \
+  }
+  GET_GAUGE_READINGS(GetTasksSuccessful, tasks_successful)
+  GET_GAUGE_READINGS(GetTasksFailedFatal, tasks_failed_fatal)
+  GET_GAUGE_READINGS(GetTasksFailedNonFatal, tasks_failed_nonfatal)
+  GET_GAUGE_READINGS(GetReconnectionsSucceeded, reconnections_succeeded)
+  GET_GAUGE_READINGS(GetReconnectionsFailed, reconnections_failed)
+#undef GET_GAUGE_READINGS
+
  protected:
+  MetricRegistry metric_registry_;
+  scoped_refptr<MetricEntity> metric_entity_;
   unique_ptr<SentryAuthzProvider> sentry_authz_provider_;
 };
 
@@ -665,5 +693,91 @@ INSTANTIATE_TEST_CASE_P(AuthzCombinations, TestAuthzHierarchy,
                                          SentryAuthorizableScope::Scope::DATABASE,
                                          SentryAuthorizableScope::Scope::TABLE)));
 
+// Test to verify the functionality of metrics in HA Sentry client used in
+// SentryAuthzProvider to communicate with Sentry.
+class TestSentryClientMetrics : public SentryAuthzProviderTest {
+ public:
+  bool KerberosEnabled() const {
+    return false;
+  }
+};
+
+TEST_F(TestSentryClientMetrics, Basic) {
+  ASSERT_EQ(0, GetTasksSuccessful());
+  ASSERT_EQ(0, GetTasksFailedFatal());
+  ASSERT_EQ(0, GetTasksFailedNonFatal());
+  ASSERT_EQ(0, GetReconnectionsSucceeded());
+  ASSERT_EQ(0, GetReconnectionsFailed());
+
+  Status s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+                                                          kTestUser, kTestUser);
+  // The call should be counted as successful, and the client should connect
+  // to Sentry (counted as reconnect).
+  ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+  ASSERT_EQ(1, GetTasksSuccessful());
+  ASSERT_EQ(0, GetTasksFailedFatal());
+  ASSERT_EQ(0, GetTasksFailedNonFatal());
+  ASSERT_EQ(1, GetReconnectionsSucceeded());
+  ASSERT_EQ(0, GetReconnectionsFailed());
+
+  // Stop Sentry, and try the same call again. There should be a fatal error
+  // reported, and then there should be failed reconnection attempts.
+  ASSERT_OK(StopSentry());
+  s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+                                                   kTestUser, kTestUser);
+  ASSERT_TRUE(s.IsNetworkError()) << s.ToString();
+  ASSERT_EQ(1, GetTasksSuccessful());
+  ASSERT_EQ(1, GetTasksFailedFatal());
+  ASSERT_EQ(0, GetTasksFailedNonFatal());
+  ASSERT_LE(1, GetReconnectionsFailed());
+  ASSERT_EQ(1, GetReconnectionsSucceeded());
+
+  ASSERT_OK(StartSentry());
+  s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+                                                   kTestUser, kTestUser);
+  ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+  ASSERT_EQ(2, GetTasksSuccessful());
+  ASSERT_EQ(1, GetTasksFailedFatal());
+  ASSERT_EQ(0, GetTasksFailedNonFatal());
+  ASSERT_EQ(2, GetReconnectionsSucceeded());
+
+  // NotAuthorized() from Sentry itself considered as a fatal error.
+  // TODO(KUDU-2769): clarify whether it is a bug in HaClient or Sentry itself?
+  s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+                                                   "nobody", "nobody");
+  ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+  ASSERT_EQ(2, GetTasksSuccessful());
+  ASSERT_EQ(2, GetTasksFailedFatal());
+  ASSERT_EQ(0, GetTasksFailedNonFatal());
+  // Once a new fatal error is registered, the client should reconnect.
+  ASSERT_EQ(3, GetReconnectionsSucceeded());
+
+  // Shorten the default timeout parameters: make timeout interval shorter.
+  NO_FATALS(sentry_authz_provider_->Stop());
+  FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
+  FLAGS_sentry_service_send_timeout_seconds = 2;
+  FLAGS_sentry_service_recv_timeout_seconds = 2;
+  sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
+  ASSERT_OK(sentry_authz_provider_->Start());
+
+  ASSERT_OK(CreateRoleAndAddToGroups(sentry_client_.get(), kRoleName, kUserGroup));
+  const auto privilege = GetDatabasePrivilege("db", "create");
+  ASSERT_OK(AlterRoleGrantPrivilege(sentry_client_.get(), kRoleName, privilege));
+
+  // Pause Sentry and try to send an RPC, expecting it to time out.
+  ASSERT_OK(sentry_->Pause());
+  s = sentry_authz_provider_->AuthorizeCreateTable(
+      "db.table", kTestUser, kTestUser);
+  ASSERT_TRUE(s.IsTimedOut());
+  ASSERT_OK(sentry_->Resume());
+
+  scoped_refptr<Histogram> hist(metric_entity_->FindOrCreateHistogram(
+      &METRIC_sentry_client_task_execution_time_us));
+  ASSERT_LT(0, hist->histogram()->MinValue());
+  ASSERT_LT(2000000, hist->histogram()->MaxValue());
+  ASSERT_LE(5, hist->histogram()->TotalCount());
+  ASSERT_LT(2000000, hist->histogram()->TotalSum());
+}
+
 } // namespace master
 } // namespace kudu
diff --git a/src/kudu/master/sentry_authz_provider.cc b/src/kudu/master/sentry_authz_provider.cc
index a2f5840..187661a 100644
--- a/src/kudu/master/sentry_authz_provider.cc
+++ b/src/kudu/master/sentry_authz_provider.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/master/sentry_authz_provider.h"
 
+#include <memory>
 #include <ostream>
 #include <type_traits>
 #include <unordered_map>
@@ -31,10 +32,12 @@
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/master/sentry_client_metrics.h"
 #include "kudu/sentry/sentry_action.h"
 #include "kudu/sentry/sentry_client.h"
 #include "kudu/sentry/sentry_policy_service_types.h"
 #include "kudu/thrift/client.h"
+#include "kudu/thrift/ha_client_metrics.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
@@ -183,6 +186,16 @@ bool SentryPrivilegesBranch::Implies(SentryAuthorizableScope::Scope required_sco
   return false;
 }
 
+SentryAuthzProvider::SentryAuthzProvider(
+    scoped_refptr<MetricEntity> metric_entity)
+    : metric_entity_(std::move(metric_entity)) {
+  if (metric_entity_) {
+    std::unique_ptr<SentryClientMetrics> metrics(
+        new SentryClientMetrics(metric_entity_));
+    ha_client_.SetMetrics(std::move(metrics));
+  }
+}
+
 SentryAuthzProvider::~SentryAuthzProvider() {
   Stop();
 }
diff --git a/src/kudu/master/sentry_authz_provider.h b/src/kudu/master/sentry_authz_provider.h
index 51d5cc1..4be72b3 100644
--- a/src/kudu/master/sentry_authz_provider.h
+++ b/src/kudu/master/sentry_authz_provider.h
@@ -27,11 +27,13 @@
 
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/port.h"
+#include "kudu/gutil/ref_counted.h"
 #include "kudu/master/authz_provider.h"
 #include "kudu/sentry/sentry_action.h"
 #include "kudu/sentry/sentry_authorizable_scope.h"
 #include "kudu/sentry/sentry_client.h"
 #include "kudu/thrift/client.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/status.h"
 
 namespace sentry {
@@ -113,6 +115,7 @@ struct SentryPrivilegesBranch {
 // This class is thread-safe after Start() is called.
 class SentryAuthzProvider : public AuthzProvider {
  public:
+  explicit SentryAuthzProvider(scoped_refptr<MetricEntity> metric_entity = {});
 
   ~SentryAuthzProvider();
 
@@ -201,6 +204,7 @@ class SentryAuthzProvider : public AuthzProvider {
                    const std::string& user,
                    bool require_grant_option = false);
 
+  scoped_refptr<MetricEntity> metric_entity_;
   thrift::HaClient<sentry::SentryClient> ha_client_;
 };
 
diff --git a/src/kudu/master/sentry_client_metrics.cc b/src/kudu/master/sentry_client_metrics.cc
new file mode 100644
index 0000000..be593d2
--- /dev/null
+++ b/src/kudu/master/sentry_client_metrics.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/master/sentry_client_metrics.h"
+
+#include "kudu/util/metrics.h"
+
+METRIC_DEFINE_counter(server, sentry_client_tasks_successful,
+                      "Successful Tasks", kudu::MetricUnit::kTasks,
+                      "Number of successfully run tasks");
+METRIC_DEFINE_counter(server, sentry_client_tasks_failed_fatal,
+                      "Failed tasks (fatal)", kudu::MetricUnit::kTasks,
+                      "Number of tasks failed with fatal errors");
+METRIC_DEFINE_counter(server, sentry_client_tasks_failed_nonfatal,
+                      "Failed Tasks (nonfatal)", kudu::MetricUnit::kTasks,
+                      "Number of tasks failed with non-fatal errors");
+METRIC_DEFINE_counter(server, sentry_client_reconnections_succeeded,
+                      "Successful Reconnections", kudu::MetricUnit::kUnits,
+                      "Number of successful reconnections to Sentry");
+METRIC_DEFINE_counter(server, sentry_client_reconnections_failed,
+                      "Failed Reconnections", kudu::MetricUnit::kUnits,
+                      "Number of failed reconnections to Sentry");
+METRIC_DEFINE_histogram(server, sentry_client_task_execution_time_us,
+                        "Task Execution Time (us)",
+                        kudu::MetricUnit::kMicroseconds,
+                        "Duration of HaClient::Execute() calls (us)",
+                        60000000, 2);
+
+namespace kudu {
+namespace master {
+
+#define MINIT(member, x) member = METRIC_##x.Instantiate(e)
+SentryClientMetrics::SentryClientMetrics(const scoped_refptr<MetricEntity>& e) {
+  MINIT(tasks_successful, sentry_client_tasks_successful);
+  MINIT(tasks_failed_fatal, sentry_client_tasks_failed_fatal);
+  MINIT(tasks_failed_nonfatal, sentry_client_tasks_failed_nonfatal);
+  MINIT(reconnections_succeeded, sentry_client_reconnections_succeeded);
+  MINIT(reconnections_failed, sentry_client_reconnections_failed);
+  MINIT(task_execution_time_us, sentry_client_task_execution_time_us);
+}
+#undef MINIT
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/master/sentry_client_metrics.h b/src/kudu/master/sentry_client_metrics.h
new file mode 100644
index 0000000..39af49f
--- /dev/null
+++ b/src/kudu/master/sentry_client_metrics.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/thrift/ha_client_metrics.h"
+
+namespace kudu {
+
+class MetricEntity;
+
+namespace master {
+struct SentryClientMetrics : public thrift::HaClientMetrics {
+  explicit SentryClientMetrics(const scoped_refptr<MetricEntity>& e);
+};
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/thrift/client.h b/src/kudu/thrift/client.h
index b6ac791..d01f213 100644
--- a/src/kudu/thrift/client.h
+++ b/src/kudu/thrift/client.h
@@ -31,6 +31,7 @@
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/thrift/ha_client_metrics.h"
 #include "kudu/util/async_util.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
@@ -106,6 +107,9 @@ class HaClient {
   // Synchronously executes a task with exclusive access to the thrift service client.
   Status Execute(std::function<Status(Service*)> task) WARN_UNUSED_RESULT;
 
+  // Set metrics to account for various events.
+  void SetMetrics(std::unique_ptr<HaClientMetrics> metrics);
+
  private:
 
   // Reconnects to an instance of the Thrift service, or returns an error if all
@@ -127,6 +131,7 @@ class HaClient {
   Status reconnect_failure_;
   int consecutive_reconnect_failures_;
   int reconnect_idx_;
+  std::unique_ptr<HaClientMetrics> metrics_;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -180,6 +185,7 @@ void HaClient<Service>::Stop() {
 
 template<typename Service>
 Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
+  const MonoTime start_time(MonoTime::Now());
   Synchronizer synchronizer;
   auto callback = synchronizer.AsStdStatusCallback();
 
@@ -232,7 +238,14 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
 
         // Attempt to reconnect.
         Status reconnect_status = Reconnect();
-        if (!reconnect_status.ok()) {
+        if (reconnect_status.ok()) {
+          if (PREDICT_TRUE(metrics_)) {
+            metrics_->reconnections_succeeded->Increment();
+          }
+        } else {
+          if (PREDICT_TRUE(metrics_)) {
+            metrics_->reconnections_failed->Increment();
+          }
           // Reconnect failed; retry with exponential backoff capped at 10s and
           // fail the task. We don't bother with jitter here because only the
           // leader master should be attempting this in any given period per
@@ -253,6 +266,13 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
 
       // If the task succeeds, or it's a non-retriable error, return the result.
       if (task_status.ok() || !IsFatalError(task_status)) {
+        if (PREDICT_TRUE(metrics_)) {
+          if (task_status.ok()) {
+            metrics_->tasks_successful->Increment();
+          } else {
+            metrics_->tasks_failed_nonfatal->Increment();
+          }
+        }
         return callback(task_status);
       }
 
@@ -264,6 +284,9 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
 
       if (attempt == 0) {
         first_failure = std::move(task_status);
+        if (PREDICT_TRUE(metrics_)) {
+          metrics_->tasks_failed_fatal->Increment();
+        }
       }
 
       WARN_NOT_OK(service_client_.Stop(),
@@ -279,7 +302,17 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
     return callback(first_failure);
   }));
 
-  return synchronizer.Wait();
+  const auto s = synchronizer.Wait();
+  if (PREDICT_TRUE(metrics_)) {
+    metrics_->task_execution_time_us->Increment(
+        (MonoTime::Now() - start_time).ToMicroseconds());
+  }
+  return s;
+}
+
+template<typename Service>
+void HaClient<Service>::SetMetrics(std::unique_ptr<HaClientMetrics> metrics) {
+  metrics_ = std::move(metrics);
 }
 
 // Note: Thrift provides a handy TSocketPool class which could be useful in
diff --git a/src/kudu/thrift/ha_client_metrics.h b/src/kudu/thrift/ha_client_metrics.h
new file mode 100644
index 0000000..d468e7b
--- /dev/null
+++ b/src/kudu/thrift/ha_client_metrics.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/util/metrics.h"
+
+namespace kudu {
+namespace thrift {
+
+struct HaClientMetrics {
+  virtual ~HaClientMetrics() = default;
+
+  scoped_refptr<Counter> tasks_successful;
+  scoped_refptr<Counter> tasks_failed_fatal;
+  scoped_refptr<Counter> tasks_failed_nonfatal;
+  scoped_refptr<Counter> reconnections_succeeded;
+  scoped_refptr<Counter> reconnections_failed;
+
+  scoped_refptr<Histogram> task_execution_time_us;
+};
+
+} // namespace thrift
+} // namespace kudu