You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by wa...@apache.org on 2023/06/05 04:35:00 UTC
[incubator-pegasus] 22/32: feat(new_metrics): migrate metrics for replica_stub (part 3) (#1462)
This is an automated email from the ASF dual-hosted git repository.
wangdan pushed a commit to branch migrate-metrics-dev
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
commit 5861e04660ce888d631a8d824b28258d012526ef
Author: Dan Wang <wa...@apache.org>
AuthorDate: Thu Apr 27 11:02:54 2023 +0800
feat(new_metrics): migrate metrics for replica_stub (part 3) (#1462)
https://github.com/apache/incubator-pegasus/issues/1454
This is the 3rd part of migrating metrics of replica_stub to new framework.
During this migration, there are 3 metrics which are changed from server-level
to replica-level, including the number of failed RPC_PREPARE requests, the
number of failed RPC_GROUP_CHECK requests launched by primary replicas,
the number of triggered emergency checkpoints.
Another 7 metrics are still kept server-level, the number of replicas whose
dirs are moved as error or garbage, the number of removed replica dirs, error
replica dirs (*.err), garbage replica dirs (*.gar), tmp replica dirs (*.tmp) and origin
replica dirs (*.ori) for disk migration.
There are 2 metrics removed, since both are shared-log-related.
---
src/nfs/nfs_client_impl.cpp | 6 +-
src/nfs/nfs_client_impl.h | 2 +-
src/nfs/nfs_server_impl.cpp | 6 +-
src/nfs/nfs_server_impl.h | 2 +-
src/replica/replica.cpp | 20 ++++-
src/replica/replica.h | 6 ++
src/replica/replica_2pc.cpp | 2 +-
src/replica/replica_check.cpp | 5 +-
src/replica/replica_chkpt.cpp | 7 +-
src/replica/replica_learn.cpp | 12 ---
src/replica/replica_stub.cpp | 136 ++++++++++++-----------------
src/replica/replica_stub.h | 21 ++---
src/server/pegasus_mutation_duplicator.cpp | 12 +--
src/server/pegasus_mutation_duplicator.h | 4 +-
src/utils/metrics.h | 4 +-
15 files changed, 113 insertions(+), 132 deletions(-)
diff --git a/src/nfs/nfs_client_impl.cpp b/src/nfs/nfs_client_impl.cpp
index 8ddadc46c..4c10b4da8 100644
--- a/src/nfs/nfs_client_impl.cpp
+++ b/src/nfs/nfs_client_impl.cpp
@@ -48,7 +48,7 @@ METRIC_DEFINE_counter(server,
"The accumulated data size in bytes requested by client during nfs copy");
METRIC_DEFINE_counter(server,
- nfs_client_failed_copy_requests,
+ nfs_client_copy_failed_requests,
dsn::metric_unit::kRequests,
"The number of failed nfs copy requests (requested by client)");
@@ -121,7 +121,7 @@ nfs_client_impl::nfs_client_impl()
_copy_requests_low(FLAGS_max_file_copy_request_count_per_file),
_high_priority_remaining_time(FLAGS_high_priority_speed_rate),
METRIC_VAR_INIT_server(nfs_client_copy_bytes),
- METRIC_VAR_INIT_server(nfs_client_failed_copy_requests),
+ METRIC_VAR_INIT_server(nfs_client_copy_failed_requests),
METRIC_VAR_INIT_server(nfs_client_write_bytes),
METRIC_VAR_INIT_server(nfs_client_failed_writes)
{
@@ -345,7 +345,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err,
}
if (err != ::dsn::ERR_OK) {
- METRIC_VAR_INCREMENT(nfs_client_failed_copy_requests);
+ METRIC_VAR_INCREMENT(nfs_client_copy_failed_requests);
if (!fc->user_req->is_finished) {
if (reqc->retry_count > 0) {
diff --git a/src/nfs/nfs_client_impl.h b/src/nfs/nfs_client_impl.h
index 0c15fc8b3..183ac38a9 100644
--- a/src/nfs/nfs_client_impl.h
+++ b/src/nfs/nfs_client_impl.h
@@ -312,7 +312,7 @@ private:
std::deque<copy_request_ex_ptr> _local_writes;
METRIC_VAR_DECLARE_counter(nfs_client_copy_bytes);
- METRIC_VAR_DECLARE_counter(nfs_client_failed_copy_requests);
+ METRIC_VAR_DECLARE_counter(nfs_client_copy_failed_requests);
METRIC_VAR_DECLARE_counter(nfs_client_write_bytes);
METRIC_VAR_DECLARE_counter(nfs_client_failed_writes);
diff --git a/src/nfs/nfs_server_impl.cpp b/src/nfs/nfs_server_impl.cpp
index 25632d4f9..ac2d6a14d 100644
--- a/src/nfs/nfs_server_impl.cpp
+++ b/src/nfs/nfs_server_impl.cpp
@@ -55,7 +55,7 @@ METRIC_DEFINE_counter(
METRIC_DEFINE_counter(
server,
- nfs_server_failed_copy_requests,
+ nfs_server_copy_failed_requests,
dsn::metric_unit::kRequests,
"The number of nfs copy requests (received by server) that fail to read local file in server");
@@ -77,7 +77,7 @@ DSN_DECLARE_int32(file_close_expire_time_ms);
nfs_service_impl::nfs_service_impl()
: ::dsn::serverlet<nfs_service_impl>("nfs"),
METRIC_VAR_INIT_server(nfs_server_copy_bytes),
- METRIC_VAR_INIT_server(nfs_server_failed_copy_requests)
+ METRIC_VAR_INIT_server(nfs_server_copy_failed_requests)
{
_file_close_timer = ::dsn::tasking::enqueue_timer(
LPC_NFS_FILE_CLOSE_TIMER,
@@ -167,7 +167,7 @@ void nfs_service_impl::internal_read_callback(error_code err, size_t sz, callbac
if (err != ERR_OK) {
LOG_ERROR("[nfs_service] read file {} failed, err = {}", cp.file_path, err);
- METRIC_VAR_INCREMENT(nfs_server_failed_copy_requests);
+ METRIC_VAR_INCREMENT(nfs_server_copy_failed_requests);
} else {
METRIC_VAR_INCREMENT_BY(nfs_server_copy_bytes, sz);
}
diff --git a/src/nfs/nfs_server_impl.h b/src/nfs/nfs_server_impl.h
index 4c07a4996..4a4c5b5c4 100644
--- a/src/nfs/nfs_server_impl.h
+++ b/src/nfs/nfs_server_impl.h
@@ -138,7 +138,7 @@ private:
_send_token_buckets; // rate limiter of send to remote
METRIC_VAR_DECLARE_counter(nfs_server_copy_bytes);
- METRIC_VAR_DECLARE_counter(nfs_server_failed_copy_requests);
+ METRIC_VAR_DECLARE_counter(nfs_server_copy_failed_requests);
std::unique_ptr<command_deregister> _nfs_max_send_rate_megabytes_cmd;
diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp
index ee6592e34..7ecad20c2 100644
--- a/src/replica/replica.cpp
+++ b/src/replica/replica.cpp
@@ -189,6 +189,21 @@ METRIC_DEFINE_counter(replica,
dsn::metric_unit::kLearns,
"The number of successful learns launched by learner");
+METRIC_DEFINE_counter(replica,
+ prepare_failed_requests,
+ dsn::metric_unit::kRequests,
+ "The number of failed RPC_PREPARE requests");
+
+METRIC_DEFINE_counter(replica,
+ group_check_failed_requests,
+ dsn::metric_unit::kRequests,
+ "The number of failed RPC_GROUP_CHECK requests launched by primary replicas");
+
+METRIC_DEFINE_counter(replica,
+ emergency_checkpoints,
+ dsn::metric_unit::kCheckpoints,
+ "The number of triggered emergency checkpoints");
+
namespace dsn {
namespace replication {
@@ -264,7 +279,10 @@ replica::replica(replica_stub *stub,
METRIC_VAR_INIT_replica(learn_lt_log_responses),
METRIC_VAR_INIT_replica(learn_resets),
METRIC_VAR_INIT_replica(learn_failed_count),
- METRIC_VAR_INIT_replica(learn_successful_count)
+ METRIC_VAR_INIT_replica(learn_successful_count),
+ METRIC_VAR_INIT_replica(prepare_failed_requests),
+ METRIC_VAR_INIT_replica(group_check_failed_requests),
+ METRIC_VAR_INIT_replica(emergency_checkpoints)
{
CHECK(!_app_info.app_type.empty(), "");
CHECK_NOTNULL(stub, "");
diff --git a/src/replica/replica.h b/src/replica/replica.h
index b63073530..e270e3db4 100644
--- a/src/replica/replica.h
+++ b/src/replica/replica.h
@@ -679,6 +679,12 @@ private:
METRIC_VAR_DECLARE_counter(learn_failed_count);
METRIC_VAR_DECLARE_counter(learn_successful_count);
+ METRIC_VAR_DECLARE_counter(prepare_failed_requests);
+
+ METRIC_VAR_DECLARE_counter(group_check_failed_requests);
+
+ METRIC_VAR_DECLARE_counter(emergency_checkpoints);
+
dsn::task_tracker _tracker;
// the thread access checker
dsn::thread_access_checker _checker;
diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp
index 6d05b5c2c..31a7c2dc6 100644
--- a/src/replica/replica_2pc.cpp
+++ b/src/replica/replica_2pc.cpp
@@ -765,7 +765,7 @@ void replica::on_prepare_reply(std::pair<mutation_ptr, partition_status::type> p
}
}
- _stub->_counter_replicas_recent_prepare_fail_count->increment();
+ METRIC_VAR_INCREMENT(prepare_failed_requests);
// make sure this is before any later commit ops
// because now commit ops may lead to new prepare ops
diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp
index e978a91db..b3be3e1f8 100644
--- a/src/replica/replica_check.cpp
+++ b/src/replica/replica_check.cpp
@@ -49,8 +49,6 @@
#include "duplication/replica_duplicator_manager.h"
#include "metadata_types.h"
#include "mutation.h"
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counter_wrapper.h"
#include "replica.h"
#include "replica/prepare_list.h"
#include "replica/replica_context.h"
@@ -66,6 +64,7 @@
#include "utils/fail_point.h"
#include "utils/flags.h"
#include "utils/fmt_logging.h"
+#include "utils/metrics.h"
#include "utils/string_view.h"
#include "utils/thread_access_checker.h"
@@ -254,7 +253,7 @@ void replica::on_group_check_reply(error_code err,
err = resp->err;
}
handle_remote_failure(req->config.status, req->node, err, "group check");
- _stub->_counter_replicas_recent_group_check_fail_count->increment();
+ METRIC_VAR_INCREMENT(group_check_failed_requests);
} else {
if (resp->learner_status_ == learner_status::LearningSucceeded &&
req->config.status == partition_status::PS_POTENTIAL_SECONDARY) {
diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp
index 5985e5d8d..7c9f6f931 100644
--- a/src/replica/replica_chkpt.cpp
+++ b/src/replica/replica_chkpt.cpp
@@ -50,8 +50,6 @@
#include "duplication/replica_duplicator_manager.h"
#include "metadata_types.h"
#include "mutation_log.h"
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counter_wrapper.h"
#include "replica.h"
#include "replica/prepare_list.h"
#include "replica/replica_context.h"
@@ -240,8 +238,9 @@ void replica::init_checkpoint(bool is_emergency)
0,
10_ms);
- if (is_emergency)
- _stub->_counter_recent_trigger_emergency_checkpoint_count->increment();
+ if (is_emergency) {
+ METRIC_VAR_INCREMENT(emergency_checkpoints);
+ }
}
// ThreadPool: THREAD_POOL_REPLICATION
diff --git a/src/replica/replica_learn.cpp b/src/replica/replica_learn.cpp
index cedce9858..75281f7d2 100644
--- a/src/replica/replica_learn.cpp
+++ b/src/replica/replica_learn.cpp
@@ -81,18 +81,6 @@
#include "utils/metrics.h"
#include "utils/thread_access_checker.h"
-METRIC_DECLARE_counter(learn_count);
-METRIC_DECLARE_counter(learn_rounds);
-METRIC_DECLARE_counter(learn_copy_files);
-METRIC_DECLARE_counter(learn_copy_file_bytes);
-METRIC_DECLARE_counter(learn_copy_buffer_bytes);
-METRIC_DECLARE_counter(learn_lt_cache_responses);
-METRIC_DECLARE_counter(learn_lt_app_responses);
-METRIC_DECLARE_counter(learn_lt_log_responses);
-METRIC_DECLARE_counter(learn_resets);
-METRIC_DECLARE_counter(learn_failed_count);
-METRIC_DECLARE_counter(learn_successful_count);
-
namespace dsn {
namespace replication {
diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp
index a460c20ff..e6e671ba8 100644
--- a/src/replica/replica_stub.cpp
+++ b/src/replica/replica_stub.cpp
@@ -128,6 +128,41 @@ METRIC_DEFINE_gauge_int64(
dsn::metric_unit::kBytes,
"The max size of files that are copied from learnee among all learning replicas");
+METRIC_DEFINE_counter(server,
+ moved_error_replicas,
+ dsn::metric_unit::kReplicas,
+ "The number of replicas whose dirs are moved as error");
+
+METRIC_DEFINE_counter(server,
+ moved_garbage_replicas,
+ dsn::metric_unit::kReplicas,
+ "The number of replicas whose dirs are moved as garbage");
+
+METRIC_DEFINE_counter(server,
+ replica_removed_dirs,
+ dsn::metric_unit::kDirs,
+ "The number of removed replica dirs");
+
+METRIC_DEFINE_gauge_int64(server,
+ replica_error_dirs,
+ dsn::metric_unit::kDirs,
+ "The number of error replica dirs (*.err)");
+
+METRIC_DEFINE_gauge_int64(server,
+ replica_garbage_dirs,
+ dsn::metric_unit::kDirs,
+ "The number of garbage replica dirs (*.gar)");
+
+METRIC_DEFINE_gauge_int64(server,
+ replica_tmp_dirs,
+ dsn::metric_unit::kDirs,
+ "The number of tmp replica dirs (*.tmp) for disk migration");
+
+METRIC_DEFINE_gauge_int64(server,
+ replica_origin_dirs,
+ dsn::metric_unit::kDirs,
+ "The number of origin replica dirs (*.ori) for disk migration");
+
namespace dsn {
namespace replication {
DSN_DEFINE_bool(replication,
@@ -231,7 +266,14 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
METRIC_VAR_INIT_server(closing_replicas),
METRIC_VAR_INIT_server(learning_replicas),
METRIC_VAR_INIT_server(learning_replicas_max_duration_ms),
- METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes)
+ METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes),
+ METRIC_VAR_INIT_server(moved_error_replicas),
+ METRIC_VAR_INIT_server(moved_garbage_replicas),
+ METRIC_VAR_INIT_server(replica_removed_dirs),
+ METRIC_VAR_INIT_server(replica_error_dirs),
+ METRIC_VAR_INIT_server(replica_garbage_dirs),
+ METRIC_VAR_INIT_server(replica_tmp_dirs),
+ METRIC_VAR_INIT_server(replica_origin_dirs)
{
#ifdef DSN_ENABLE_GPERF
_is_releasing_memory = false;
@@ -249,66 +291,6 @@ replica_stub::~replica_stub(void) { close(); }
void replica_stub::install_perf_counters()
{
- _counter_replicas_recent_prepare_fail_count.init_app_counter(
- "eon.replica_stub",
- "replicas.recent.prepare.fail.count",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "prepare fail count in the recent period");
- _counter_replicas_recent_replica_move_error_count.init_app_counter(
- "eon.replica_stub",
- "replicas.recent.replica.move.error.count",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "replica move to error count in the recent period");
- _counter_replicas_recent_replica_move_garbage_count.init_app_counter(
- "eon.replica_stub",
- "replicas.recent.replica.move.garbage.count",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "replica move to garbage count in the recent period");
- _counter_replicas_recent_replica_remove_dir_count.init_app_counter(
- "eon.replica_stub",
- "replicas.recent.replica.remove.dir.count",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "replica directory remove count in the recent period");
- _counter_replicas_error_replica_dir_count.init_app_counter(
- "eon.replica_stub",
- "replicas.error.replica.dir.count",
- COUNTER_TYPE_NUMBER,
- "error replica directory(*.err) count");
- _counter_replicas_garbage_replica_dir_count.init_app_counter(
- "eon.replica_stub",
- "replicas.garbage.replica.dir.count",
- COUNTER_TYPE_NUMBER,
- "garbage replica directory(*.gar) count");
- _counter_replicas_tmp_replica_dir_count.init_app_counter(
- "eon.replica_stub",
- "replicas.tmp.replica.dir.count",
- COUNTER_TYPE_NUMBER,
- "disk migration tmp replica directory(*.tmp) count");
- _counter_replicas_origin_replica_dir_count.init_app_counter(
- "eon.replica_stub",
- "replicas.origin.replica.dir.count",
- COUNTER_TYPE_NUMBER,
- "disk migration origin replica directory(.ori) count");
-
- _counter_replicas_recent_group_check_fail_count.init_app_counter(
- "eon.replica_stub",
- "replicas.recent.group.check.fail.count",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "group check fail count in the recent period");
-
- _counter_shared_log_size.init_app_counter(
- "eon.replica_stub", "shared.log.size(MB)", COUNTER_TYPE_NUMBER, "shared log size(MB)");
- _counter_shared_log_recent_write_size.init_app_counter(
- "eon.replica_stub",
- "shared.log.recent.write.size",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "shared log write size in the recent period");
- _counter_recent_trigger_emergency_checkpoint_count.init_app_counter(
- "eon.replica_stub",
- "recent.trigger.emergency.checkpoint.count",
- COUNTER_TYPE_VOLATILE_NUMBER,
- "trigger emergency checkpoint count in the recent period");
-
// <- Duplication Metrics ->
_counter_dup_confirmed_rate.init_app_counter("eon.replica_stub",
@@ -560,10 +542,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
_options.slog_dir = cdir;
// Initialize slog.
- _log = new mutation_log_shared(_options.slog_dir,
- FLAGS_log_shared_file_size_mb,
- FLAGS_log_shared_force_flush,
- &_counter_shared_log_recent_write_size);
+ _log = new mutation_log_shared(
+ _options.slog_dir, FLAGS_log_shared_file_size_mb, FLAGS_log_shared_force_flush);
LOG_INFO("slog_dir = {}", _options.slog_dir);
// Start to load replicas in available data directories.
@@ -670,7 +650,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
for (auto it = rps.begin(); it != rps.end(); ++it) {
it->second->close();
move_to_err_path(it->second->dir(), "initialize replica");
- _counter_replicas_recent_replica_move_error_count->increment();
+ METRIC_VAR_INCREMENT(moved_error_replicas);
}
rps.clear();
@@ -680,10 +660,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
CHECK(utils::filesystem::remove_path(_options.slog_dir),
"remove directory {} failed",
_options.slog_dir);
- _log = new mutation_log_shared(_options.slog_dir,
- FLAGS_log_shared_file_size_mb,
- FLAGS_log_shared_force_flush,
- &_counter_shared_log_recent_write_size);
+ _log = new mutation_log_shared(
+ _options.slog_dir, FLAGS_log_shared_file_size_mb, FLAGS_log_shared_force_flush);
CHECK_EQ_MSG(_log->open(nullptr, [this](error_code err) { this->handle_log_failure(err); }),
ERR_OK,
"restart log service failed");
@@ -1724,7 +1702,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id)
LOG_WARNING("gc_replica: replica_dir_op succeed to move directory '{}' to '{}'",
replica_path,
rename_path);
- _counter_replicas_recent_replica_move_garbage_count->increment();
+ METRIC_VAR_INCREMENT(moved_garbage_replicas);
}
}
@@ -1857,8 +1835,6 @@ void replica_stub::on_gc()
}
}
}
-
- _counter_shared_log_size->set(_log->total_size() / (1024 * 1024));
}
// statistic learning info
@@ -1940,11 +1916,11 @@ void replica_stub::on_disk_stat()
_fs_manager.update_disk_stat();
update_disk_holding_replicas();
- _counter_replicas_error_replica_dir_count->set(report.error_replica_count);
- _counter_replicas_garbage_replica_dir_count->set(report.garbage_replica_count);
- _counter_replicas_tmp_replica_dir_count->set(report.disk_migrate_tmp_count);
- _counter_replicas_origin_replica_dir_count->set(report.disk_migrate_origin_count);
- _counter_replicas_recent_replica_remove_dir_count->add(report.remove_dir_count);
+ METRIC_VAR_SET(replica_error_dirs, report.error_replica_count);
+ METRIC_VAR_SET(replica_garbage_dirs, report.garbage_replica_count);
+ METRIC_VAR_SET(replica_tmp_dirs, report.disk_migrate_tmp_count);
+ METRIC_VAR_SET(replica_origin_dirs, report.disk_migrate_origin_count);
+ METRIC_VAR_INCREMENT_BY(replica_removed_dirs, report.remove_dir_count);
LOG_INFO("finish to update disk stat, time_used_ns = {}", dsn_now_ns() - start);
}
@@ -2258,7 +2234,7 @@ replica *replica_stub::load_replica(dir_node *dn, const char *dir)
// clear work on failure
if (dsn::utils::filesystem::directory_exists(dir)) {
move_to_err_path(dir, "load replica");
- _counter_replicas_recent_replica_move_error_count->increment();
+ METRIC_VAR_INCREMENT(moved_error_replicas);
_fs_manager.remove_replica(pid);
}
@@ -2346,7 +2322,7 @@ void replica_stub::close_replica(replica_ptr r)
if (r->is_data_corrupted()) {
_fs_manager.remove_replica(id);
move_to_err_path(r->dir(), "trash replica");
- _counter_replicas_recent_replica_move_error_count->increment();
+ METRIC_VAR_INCREMENT(moved_error_replicas);
}
LOG_INFO("{}: finish to close replica", name);
diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h
index b999da48f..5b9dc967f 100644
--- a/src/replica/replica_stub.h
+++ b/src/replica/replica_stub.h
@@ -503,20 +503,13 @@ private:
METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_duration_ms);
METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_copy_file_bytes);
- perf_counter_wrapper _counter_replicas_recent_prepare_fail_count;
- perf_counter_wrapper _counter_replicas_recent_replica_move_error_count;
- perf_counter_wrapper _counter_replicas_recent_replica_move_garbage_count;
- perf_counter_wrapper _counter_replicas_recent_replica_remove_dir_count;
- perf_counter_wrapper _counter_replicas_error_replica_dir_count;
- perf_counter_wrapper _counter_replicas_garbage_replica_dir_count;
- perf_counter_wrapper _counter_replicas_tmp_replica_dir_count;
- perf_counter_wrapper _counter_replicas_origin_replica_dir_count;
-
- perf_counter_wrapper _counter_replicas_recent_group_check_fail_count;
-
- perf_counter_wrapper _counter_shared_log_size;
- perf_counter_wrapper _counter_shared_log_recent_write_size;
- perf_counter_wrapper _counter_recent_trigger_emergency_checkpoint_count;
+ METRIC_VAR_DECLARE_counter(moved_error_replicas);
+ METRIC_VAR_DECLARE_counter(moved_garbage_replicas);
+ METRIC_VAR_DECLARE_counter(replica_removed_dirs);
+ METRIC_VAR_DECLARE_gauge_int64(replica_error_dirs);
+ METRIC_VAR_DECLARE_gauge_int64(replica_garbage_dirs);
+ METRIC_VAR_DECLARE_gauge_int64(replica_tmp_dirs);
+ METRIC_VAR_DECLARE_gauge_int64(replica_origin_dirs);
// <- Duplication Metrics ->
// TODO(wutao1): calculate the counters independently for each remote cluster
diff --git a/src/server/pegasus_mutation_duplicator.cpp b/src/server/pegasus_mutation_duplicator.cpp
index 8d87ce60b..74832d5e6 100644
--- a/src/server/pegasus_mutation_duplicator.cpp
+++ b/src/server/pegasus_mutation_duplicator.cpp
@@ -48,12 +48,12 @@
#include "utils/rand.h"
METRIC_DEFINE_counter(replica,
- successful_mutation_dup_requests,
+ mutation_dup_successful_requests,
dsn::metric_unit::kRequests,
"The number of successful DUPLICATE requests sent from mutation duplicator");
METRIC_DEFINE_counter(replica,
- failed_mutation_dup_requests,
+ mutation_dup_failed_requests,
dsn::metric_unit::kRequests,
"The number of failed DUPLICATE requests sent from mutation duplicator");
@@ -107,8 +107,8 @@ pegasus_mutation_duplicator::pegasus_mutation_duplicator(dsn::replication::repli
dsn::string_view app)
: mutation_duplicator(r),
_remote_cluster(remote_cluster),
- METRIC_VAR_INIT_replica(successful_mutation_dup_requests),
- METRIC_VAR_INIT_replica(failed_mutation_dup_requests)
+ METRIC_VAR_INIT_replica(mutation_dup_successful_requests),
+ METRIC_VAR_INIT_replica(mutation_dup_failed_requests)
{
// initialize pegasus-client when this class is first time used.
static __attribute__((unused)) bool _dummy = pegasus_client_factory::initialize(nullptr);
@@ -162,7 +162,7 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash,
}
if (perr != PERR_OK || err != dsn::ERR_OK) {
- METRIC_VAR_INCREMENT(failed_mutation_dup_requests);
+ METRIC_VAR_INCREMENT(mutation_dup_failed_requests);
// randomly log the 1% of the failed duplicate rpc, because minor number of
// errors are acceptable.
@@ -175,7 +175,7 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash,
// duplicating an illegal write to server is unacceptable, fail fast.
CHECK_NE_PREFIX_MSG(perr, PERR_INVALID_ARGUMENT, rpc.response().error_hint);
} else {
- METRIC_VAR_INCREMENT(successful_mutation_dup_requests);
+ METRIC_VAR_INCREMENT(mutation_dup_successful_requests);
_total_shipped_size +=
rpc.dsn_request()->header->body_length + rpc.dsn_request()->header->hdr_length;
}
diff --git a/src/server/pegasus_mutation_duplicator.h b/src/server/pegasus_mutation_duplicator.h
index 9a5aa086c..dfe126df7 100644
--- a/src/server/pegasus_mutation_duplicator.h
+++ b/src/server/pegasus_mutation_duplicator.h
@@ -89,8 +89,8 @@ private:
size_t _total_shipped_size{0};
- METRIC_VAR_DECLARE_counter(successful_mutation_dup_requests);
- METRIC_VAR_DECLARE_counter(failed_mutation_dup_requests);
+ METRIC_VAR_DECLARE_counter(mutation_dup_successful_requests);
+ METRIC_VAR_DECLARE_counter(mutation_dup_failed_requests);
};
// Decodes the binary `request_data` into write request in thrift struct, and
diff --git a/src/utils/metrics.h b/src/utils/metrics.h
index b5e31c050..ec2fb6977 100644
--- a/src/utils/metrics.h
+++ b/src/utils/metrics.h
@@ -652,8 +652,8 @@ enum class metric_unit : size_t
kMegaBytes,
kCapacityUnits,
kPercent,
- kPartitions,
kReplicas,
+ kPartitions,
kServers,
kRequests,
kResponses,
@@ -662,7 +662,9 @@ enum class metric_unit : size_t
kValues,
kKeys,
kFiles,
+ kDirs,
kAmplification,
+ kCheckpoints,
kFlushes,
kCompactions,
kWrites,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org