You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2019/04/09 20:39:11 UTC
[kudu] 03/04: [HaClient] introduce client metrics
This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 9382d072111117dc7e9410e8eeb85781e9702384
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Fri Apr 5 20:11:56 2019 -0700
[HaClient] introduce client metrics
Introduced metrics for HA Sentry client in SentryAuthzProvider.
Added corresponding test to verify how the newly introduced
metrics work.
Change-Id: I86e12ee6ee34f39c11f18b21e846cc5a1dee6b6f
Reviewed-on: http://gerrit.cloudera.org:8080/12951
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
src/kudu/master/CMakeLists.txt | 1 +
src/kudu/master/catalog_manager.cc | 11 ++-
src/kudu/master/sentry_authz_provider-test.cc | 130 ++++++++++++++++++++++++--
src/kudu/master/sentry_authz_provider.cc | 13 +++
src/kudu/master/sentry_authz_provider.h | 4 +
src/kudu/master/sentry_client_metrics.cc | 58 ++++++++++++
src/kudu/master/sentry_client_metrics.h | 33 +++++++
src/kudu/thrift/client.h | 37 +++++++-
src/kudu/thrift/ha_client_metrics.h | 41 ++++++++
9 files changed, 313 insertions(+), 15 deletions(-)
diff --git a/src/kudu/master/CMakeLists.txt b/src/kudu/master/CMakeLists.txt
index 6409499..1ada2e7 100644
--- a/src/kudu/master/CMakeLists.txt
+++ b/src/kudu/master/CMakeLists.txt
@@ -45,6 +45,7 @@ set(MASTER_SRCS
mini_master.cc
placement_policy.cc
sentry_authz_provider.cc
+ sentry_client_metrics.cc
sys_catalog.cc
ts_descriptor.cc
ts_manager.cc)
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index b3480ea..ced919f 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -44,6 +44,7 @@
#include <algorithm>
#include <cstdint>
#include <cstdlib>
+#include <ctime>
#include <functional>
#include <iterator>
#include <map>
@@ -53,7 +54,6 @@
#include <ostream>
#include <set>
#include <string>
-#include <time.h>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
@@ -132,6 +132,7 @@
#include "kudu/util/fault_injection.h"
#include "kudu/util/flag_tags.h"
#include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
#include "kudu/util/monotime.h"
#include "kudu/util/mutex.h"
#include "kudu/util/pb_util.h"
@@ -726,7 +727,7 @@ Status CatalogManager::Init(bool is_first_run) {
RETURN_NOT_OK_PREPEND(sys_catalog_->WaitUntilRunning(),
"Failed waiting for the catalog tablet to run");
- authz_provider_.reset(new master::DefaultAuthzProvider());
+ authz_provider_.reset(new DefaultAuthzProvider);
if (hms::HmsCatalog::IsEnabled()) {
vector<HostPortPB> master_addrs_pb;
@@ -756,7 +757,7 @@ Status CatalogManager::Init(bool is_first_run) {
// Use SentryAuthzProvider when both Sentry and the HMS integration are enabled.
if (SentryAuthzProvider::IsEnabled()) {
- authz_provider_.reset(new master::SentryAuthzProvider());
+ authz_provider_.reset(new SentryAuthzProvider(master_->metric_entity()));
}
}
@@ -4603,7 +4604,7 @@ void CatalogManager::SendCreateTabletRequest(const scoped_refptr<TabletInfo>& ta
Status CatalogManager::BuildLocationsForTablet(
const scoped_refptr<TabletInfo>& tablet,
- master::ReplicaTypeFilter filter,
+ ReplicaTypeFilter filter,
TabletLocationsPB* locs_pb) {
TabletMetadataLock l_tablet(tablet.get(), LockMode::READ);
if (PREDICT_FALSE(l_tablet.data().is_deleted())) {
@@ -4674,7 +4675,7 @@ Status CatalogManager::BuildLocationsForTablet(
}
Status CatalogManager::GetTabletLocations(const string& tablet_id,
- master::ReplicaTypeFilter filter,
+ ReplicaTypeFilter filter,
TabletLocationsPB* locs_pb,
optional<const string&> user) {
leader_lock_.AssertAcquiredForReading();
diff --git a/src/kudu/master/sentry_authz_provider-test.cc b/src/kudu/master/sentry_authz_provider-test.cc
index 04aff81..3266823 100644
--- a/src/kudu/master/sentry_authz_provider-test.cc
+++ b/src/kudu/master/sentry_authz_provider-test.cc
@@ -17,6 +17,7 @@
#include "kudu/master/sentry_authz_provider.h"
+#include <cstdint>
#include <memory>
#include <ostream>
#include <string>
@@ -28,6 +29,7 @@
#include "kudu/gutil/macros.h"
#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/master/sentry_authz_provider-test-base.h"
#include "kudu/sentry/mini_sentry.h"
@@ -36,6 +38,8 @@
#include "kudu/sentry/sentry_authorizable_scope.h"
#include "kudu/sentry/sentry_client.h"
#include "kudu/sentry/sentry_policy_service_types.h"
+#include "kudu/util/hdr_histogram.h"
+#include "kudu/util/metrics.h"
#include "kudu/util/net/net_util.h"
#include "kudu/util/random.h"
#include "kudu/util/random_util.h"
@@ -50,6 +54,18 @@ DECLARE_string(sentry_service_security_mode);
DECLARE_string(server_name);
DECLARE_string(trusted_user_acl);
+METRIC_DECLARE_counter(sentry_client_tasks_successful);
+METRIC_DECLARE_counter(sentry_client_tasks_failed_fatal);
+METRIC_DECLARE_counter(sentry_client_tasks_failed_nonfatal);
+METRIC_DECLARE_counter(sentry_client_reconnections_succeeded);
+METRIC_DECLARE_counter(sentry_client_reconnections_failed);
+METRIC_DECLARE_histogram(sentry_client_task_execution_time_us);
+
+using kudu::sentry::AuthorizableScopesSet;
+using kudu::sentry::SentryAction;
+using kudu::sentry::SentryActionsSet;
+using kudu::sentry::SentryTestBase;
+using kudu::sentry::SentryAuthorizableScope;
using sentry::TSentryAuthorizable;
using sentry::TSentryGrantOption;
using sentry::TSentryPrivilege;
@@ -60,13 +76,6 @@ using std::vector;
using strings::Substitute;
namespace kudu {
-
-using sentry::AuthorizableScopesSet;
-using sentry::SentryAction;
-using sentry::SentryActionsSet;
-using sentry::SentryTestBase;
-using sentry::SentryAuthorizableScope;
-
namespace master {
TEST(SentryAuthzProviderStaticTest, TestTrustedUserAcl) {
@@ -151,10 +160,13 @@ class SentryAuthzProviderTest : public SentryTestBase {
void SetUp() override {
SentryTestBase::SetUp();
+ metric_entity_ = METRIC_ENTITY_server.Instantiate(
+ &metric_registry_, "sentry_auth_provider-test");
+
// Configure the SentryAuthzProvider flags.
FLAGS_sentry_service_security_mode = KerberosEnabled() ? "kerberos" : "none";
FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
- sentry_authz_provider_.reset(new SentryAuthzProvider());
+ sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
ASSERT_OK(sentry_authz_provider_->Start());
}
@@ -174,7 +186,23 @@ class SentryAuthzProviderTest : public SentryTestBase {
return false;
}
+#define GET_GAUGE_READINGS(func_name, counter_name_suffix) \
+ int64_t func_name() { \
+ scoped_refptr<Counter> gauge(metric_entity_->FindOrCreateCounter( \
+ &METRIC_sentry_client_##counter_name_suffix)); \
+ CHECK(gauge); \
+ return gauge->value(); \
+ }
+ GET_GAUGE_READINGS(GetTasksSuccessful, tasks_successful)
+ GET_GAUGE_READINGS(GetTasksFailedFatal, tasks_failed_fatal)
+ GET_GAUGE_READINGS(GetTasksFailedNonFatal, tasks_failed_nonfatal)
+ GET_GAUGE_READINGS(GetReconnectionsSucceeded, reconnections_succeeded)
+ GET_GAUGE_READINGS(GetReconnectionsFailed, reconnections_failed)
+#undef GET_GAUGE_READINGS
+
protected:
+ MetricRegistry metric_registry_;
+ scoped_refptr<MetricEntity> metric_entity_;
unique_ptr<SentryAuthzProvider> sentry_authz_provider_;
};
@@ -665,5 +693,91 @@ INSTANTIATE_TEST_CASE_P(AuthzCombinations, TestAuthzHierarchy,
SentryAuthorizableScope::Scope::DATABASE,
SentryAuthorizableScope::Scope::TABLE)));
+// Test to verify the functionality of metrics in HA Sentry client used in
+// SentryAuthzProvider to communicate with Sentry.
+class TestSentryClientMetrics : public SentryAuthzProviderTest {
+ public:
+ bool KerberosEnabled() const {
+ return false;
+ }
+};
+
+TEST_F(TestSentryClientMetrics, Basic) {
+ ASSERT_EQ(0, GetTasksSuccessful());
+ ASSERT_EQ(0, GetTasksFailedFatal());
+ ASSERT_EQ(0, GetTasksFailedNonFatal());
+ ASSERT_EQ(0, GetReconnectionsSucceeded());
+ ASSERT_EQ(0, GetReconnectionsFailed());
+
+ Status s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+ kTestUser, kTestUser);
+ // The call should be counted as successful, and the client should connect
+ // to Sentry (counted as reconnect).
+ ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+ ASSERT_EQ(1, GetTasksSuccessful());
+ ASSERT_EQ(0, GetTasksFailedFatal());
+ ASSERT_EQ(0, GetTasksFailedNonFatal());
+ ASSERT_EQ(1, GetReconnectionsSucceeded());
+ ASSERT_EQ(0, GetReconnectionsFailed());
+
+ // Stop Sentry, and try the same call again. There should be a fatal error
+ // reported, and then there should be failed reconnection attempts.
+ ASSERT_OK(StopSentry());
+ s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+ kTestUser, kTestUser);
+ ASSERT_TRUE(s.IsNetworkError()) << s.ToString();
+ ASSERT_EQ(1, GetTasksSuccessful());
+ ASSERT_EQ(1, GetTasksFailedFatal());
+ ASSERT_EQ(0, GetTasksFailedNonFatal());
+ ASSERT_LE(1, GetReconnectionsFailed());
+ ASSERT_EQ(1, GetReconnectionsSucceeded());
+
+ ASSERT_OK(StartSentry());
+ s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+ kTestUser, kTestUser);
+ ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+ ASSERT_EQ(2, GetTasksSuccessful());
+ ASSERT_EQ(1, GetTasksFailedFatal());
+ ASSERT_EQ(0, GetTasksFailedNonFatal());
+ ASSERT_EQ(2, GetReconnectionsSucceeded());
+
+ // NotAuthorized() from Sentry itself considered as a fatal error.
+ // TODO(KUDU-2769): clarify whether it is a bug in HaClient or Sentry itself?
+ s = sentry_authz_provider_->AuthorizeCreateTable("db.table",
+ "nobody", "nobody");
+ ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+ ASSERT_EQ(2, GetTasksSuccessful());
+ ASSERT_EQ(2, GetTasksFailedFatal());
+ ASSERT_EQ(0, GetTasksFailedNonFatal());
+ // Once a new fatal error is registered, the client should reconnect.
+ ASSERT_EQ(3, GetReconnectionsSucceeded());
+
+ // Shorten the default timeout parameters: make timeout interval shorter.
+ NO_FATALS(sentry_authz_provider_->Stop());
+ FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
+ FLAGS_sentry_service_send_timeout_seconds = 2;
+ FLAGS_sentry_service_recv_timeout_seconds = 2;
+ sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
+ ASSERT_OK(sentry_authz_provider_->Start());
+
+ ASSERT_OK(CreateRoleAndAddToGroups(sentry_client_.get(), kRoleName, kUserGroup));
+ const auto privilege = GetDatabasePrivilege("db", "create");
+ ASSERT_OK(AlterRoleGrantPrivilege(sentry_client_.get(), kRoleName, privilege));
+
+ // Pause Sentry and try to send an RPC, expecting it to time out.
+ ASSERT_OK(sentry_->Pause());
+ s = sentry_authz_provider_->AuthorizeCreateTable(
+ "db.table", kTestUser, kTestUser);
+ ASSERT_TRUE(s.IsTimedOut());
+ ASSERT_OK(sentry_->Resume());
+
+ scoped_refptr<Histogram> hist(metric_entity_->FindOrCreateHistogram(
+ &METRIC_sentry_client_task_execution_time_us));
+ ASSERT_LT(0, hist->histogram()->MinValue());
+ ASSERT_LT(2000000, hist->histogram()->MaxValue());
+ ASSERT_LE(5, hist->histogram()->TotalCount());
+ ASSERT_LT(2000000, hist->histogram()->TotalSum());
+}
+
} // namespace master
} // namespace kudu
diff --git a/src/kudu/master/sentry_authz_provider.cc b/src/kudu/master/sentry_authz_provider.cc
index a2f5840..187661a 100644
--- a/src/kudu/master/sentry_authz_provider.cc
+++ b/src/kudu/master/sentry_authz_provider.cc
@@ -17,6 +17,7 @@
#include "kudu/master/sentry_authz_provider.h"
+#include <memory>
#include <ostream>
#include <type_traits>
#include <unordered_map>
@@ -31,10 +32,12 @@
#include "kudu/gutil/macros.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/substitute.h"
+#include "kudu/master/sentry_client_metrics.h"
#include "kudu/sentry/sentry_action.h"
#include "kudu/sentry/sentry_client.h"
#include "kudu/sentry/sentry_policy_service_types.h"
#include "kudu/thrift/client.h"
+#include "kudu/thrift/ha_client_metrics.h"
#include "kudu/util/flag_tags.h"
#include "kudu/util/monotime.h"
#include "kudu/util/net/net_util.h"
@@ -183,6 +186,16 @@ bool SentryPrivilegesBranch::Implies(SentryAuthorizableScope::Scope required_sco
return false;
}
+SentryAuthzProvider::SentryAuthzProvider(
+ scoped_refptr<MetricEntity> metric_entity)
+ : metric_entity_(std::move(metric_entity)) {
+ if (metric_entity_) {
+ std::unique_ptr<SentryClientMetrics> metrics(
+ new SentryClientMetrics(metric_entity_));
+ ha_client_.SetMetrics(std::move(metrics));
+ }
+}
+
SentryAuthzProvider::~SentryAuthzProvider() {
Stop();
}
diff --git a/src/kudu/master/sentry_authz_provider.h b/src/kudu/master/sentry_authz_provider.h
index 51d5cc1..4be72b3 100644
--- a/src/kudu/master/sentry_authz_provider.h
+++ b/src/kudu/master/sentry_authz_provider.h
@@ -27,11 +27,13 @@
#include "kudu/gutil/macros.h"
#include "kudu/gutil/port.h"
+#include "kudu/gutil/ref_counted.h"
#include "kudu/master/authz_provider.h"
#include "kudu/sentry/sentry_action.h"
#include "kudu/sentry/sentry_authorizable_scope.h"
#include "kudu/sentry/sentry_client.h"
#include "kudu/thrift/client.h"
+#include "kudu/util/metrics.h"
#include "kudu/util/status.h"
namespace sentry {
@@ -113,6 +115,7 @@ struct SentryPrivilegesBranch {
// This class is thread-safe after Start() is called.
class SentryAuthzProvider : public AuthzProvider {
public:
+ explicit SentryAuthzProvider(scoped_refptr<MetricEntity> metric_entity = {});
~SentryAuthzProvider();
@@ -201,6 +204,7 @@ class SentryAuthzProvider : public AuthzProvider {
const std::string& user,
bool require_grant_option = false);
+ scoped_refptr<MetricEntity> metric_entity_;
thrift::HaClient<sentry::SentryClient> ha_client_;
};
diff --git a/src/kudu/master/sentry_client_metrics.cc b/src/kudu/master/sentry_client_metrics.cc
new file mode 100644
index 0000000..be593d2
--- /dev/null
+++ b/src/kudu/master/sentry_client_metrics.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/master/sentry_client_metrics.h"
+
+#include "kudu/util/metrics.h"
+
+METRIC_DEFINE_counter(server, sentry_client_tasks_successful,
+ "Successful Tasks", kudu::MetricUnit::kTasks,
+ "Number of successfully run tasks");
+METRIC_DEFINE_counter(server, sentry_client_tasks_failed_fatal,
+ "Failed tasks (fatal)", kudu::MetricUnit::kTasks,
+ "Number of tasks failed with fatal errors");
+METRIC_DEFINE_counter(server, sentry_client_tasks_failed_nonfatal,
+ "Failed Tasks (nonfatal)", kudu::MetricUnit::kTasks,
+ "Number of tasks failed with non-fatal errors");
+METRIC_DEFINE_counter(server, sentry_client_reconnections_succeeded,
+ "Successful Reconnections", kudu::MetricUnit::kUnits,
+ "Number of successful reconnections to Sentry");
+METRIC_DEFINE_counter(server, sentry_client_reconnections_failed,
+ "Failed Reconnections", kudu::MetricUnit::kUnits,
+ "Number of failed reconnections to Sentry");
+METRIC_DEFINE_histogram(server, sentry_client_task_execution_time_us,
+ "Task Execution Time (us)",
+ kudu::MetricUnit::kMicroseconds,
+ "Duration of HaClient::Execute() calls (us)",
+ 60000000, 2);
+
+namespace kudu {
+namespace master {
+
+#define MINIT(member, x) member = METRIC_##x.Instantiate(e)
+SentryClientMetrics::SentryClientMetrics(const scoped_refptr<MetricEntity>& e) {
+ MINIT(tasks_successful, sentry_client_tasks_successful);
+ MINIT(tasks_failed_fatal, sentry_client_tasks_failed_fatal);
+ MINIT(tasks_failed_nonfatal, sentry_client_tasks_failed_nonfatal);
+ MINIT(reconnections_succeeded, sentry_client_reconnections_succeeded);
+ MINIT(reconnections_failed, sentry_client_reconnections_failed);
+ MINIT(task_execution_time_us, sentry_client_task_execution_time_us);
+}
+#undef MINIT
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/master/sentry_client_metrics.h b/src/kudu/master/sentry_client_metrics.h
new file mode 100644
index 0000000..39af49f
--- /dev/null
+++ b/src/kudu/master/sentry_client_metrics.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/thrift/ha_client_metrics.h"
+
+namespace kudu {
+
+class MetricEntity;
+
+namespace master {
+struct SentryClientMetrics : public thrift::HaClientMetrics {
+ explicit SentryClientMetrics(const scoped_refptr<MetricEntity>& e);
+};
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/thrift/client.h b/src/kudu/thrift/client.h
index b6ac791..d01f213 100644
--- a/src/kudu/thrift/client.h
+++ b/src/kudu/thrift/client.h
@@ -31,6 +31,7 @@
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/substitute.h"
+#include "kudu/thrift/ha_client_metrics.h"
#include "kudu/util/async_util.h"
#include "kudu/util/monotime.h"
#include "kudu/util/net/net_util.h"
@@ -106,6 +107,9 @@ class HaClient {
// Synchronously executes a task with exclusive access to the thrift service client.
Status Execute(std::function<Status(Service*)> task) WARN_UNUSED_RESULT;
+ // Set metrics to account for various events.
+ void SetMetrics(std::unique_ptr<HaClientMetrics> metrics);
+
private:
// Reconnects to an instance of the Thrift service, or returns an error if all
@@ -127,6 +131,7 @@ class HaClient {
Status reconnect_failure_;
int consecutive_reconnect_failures_;
int reconnect_idx_;
+ std::unique_ptr<HaClientMetrics> metrics_;
};
///////////////////////////////////////////////////////////////////////////////
@@ -180,6 +185,7 @@ void HaClient<Service>::Stop() {
template<typename Service>
Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
+ const MonoTime start_time(MonoTime::Now());
Synchronizer synchronizer;
auto callback = synchronizer.AsStdStatusCallback();
@@ -232,7 +238,14 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
// Attempt to reconnect.
Status reconnect_status = Reconnect();
- if (!reconnect_status.ok()) {
+ if (reconnect_status.ok()) {
+ if (PREDICT_TRUE(metrics_)) {
+ metrics_->reconnections_succeeded->Increment();
+ }
+ } else {
+ if (PREDICT_TRUE(metrics_)) {
+ metrics_->reconnections_failed->Increment();
+ }
// Reconnect failed; retry with exponential backoff capped at 10s and
// fail the task. We don't bother with jitter here because only the
// leader master should be attempting this in any given period per
@@ -253,6 +266,13 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
// If the task succeeds, or it's a non-retriable error, return the result.
if (task_status.ok() || !IsFatalError(task_status)) {
+ if (PREDICT_TRUE(metrics_)) {
+ if (task_status.ok()) {
+ metrics_->tasks_successful->Increment();
+ } else {
+ metrics_->tasks_failed_nonfatal->Increment();
+ }
+ }
return callback(task_status);
}
@@ -264,6 +284,9 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
if (attempt == 0) {
first_failure = std::move(task_status);
+ if (PREDICT_TRUE(metrics_)) {
+ metrics_->tasks_failed_fatal->Increment();
+ }
}
WARN_NOT_OK(service_client_.Stop(),
@@ -279,7 +302,17 @@ Status HaClient<Service>::Execute(std::function<Status(Service*)> task) {
return callback(first_failure);
}));
- return synchronizer.Wait();
+ const auto s = synchronizer.Wait();
+ if (PREDICT_TRUE(metrics_)) {
+ metrics_->task_execution_time_us->Increment(
+ (MonoTime::Now() - start_time).ToMicroseconds());
+ }
+ return s;
+}
+
+template<typename Service>
+void HaClient<Service>::SetMetrics(std::unique_ptr<HaClientMetrics> metrics) {
+ metrics_ = std::move(metrics);
}
// Note: Thrift provides a handy TSocketPool class which could be useful in
diff --git a/src/kudu/thrift/ha_client_metrics.h b/src/kudu/thrift/ha_client_metrics.h
new file mode 100644
index 0000000..d468e7b
--- /dev/null
+++ b/src/kudu/thrift/ha_client_metrics.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/util/metrics.h"
+
+namespace kudu {
+namespace thrift {
+
+struct HaClientMetrics {
+ virtual ~HaClientMetrics() = default;
+
+ scoped_refptr<Counter> tasks_successful;
+ scoped_refptr<Counter> tasks_failed_fatal;
+ scoped_refptr<Counter> tasks_failed_nonfatal;
+ scoped_refptr<Counter> reconnections_succeeded;
+ scoped_refptr<Counter> reconnections_failed;
+
+ scoped_refptr<Histogram> task_execution_time_us;
+};
+
+} // namespace thrift
+} // namespace kudu