You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by wa...@apache.org on 2023/05/11 03:35:32 UTC

[incubator-pegasus] 22/24: feat(new_metrics): migrate metrics for replica_stub (part 3) (#1462)

This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch migrate-metrics-dev
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git

commit 6fa7194f277da4e176f1a481a0c225698311b840
Author: Dan Wang <wa...@apache.org>
AuthorDate: Thu Apr 27 11:02:54 2023 +0800

    feat(new_metrics): migrate metrics for replica_stub (part 3) (#1462)
    
    https://github.com/apache/incubator-pegasus/issues/1454
    
    This is the 3rd part of migrating metrics of replica_stub to new framework.
    
    During this migration, there are 3 metrics which are changed from server-level
    to replica-level, including the number of failed RPC_PREPARE requests, the
    number of failed RPC_GROUP_CHECK requests launched by primary replicas,
    the number of triggered emergency checkpoints.
    
    Another 7 metrics are still kept server-level, the number of replicas whose
    dirs are moved as error or garbage, the number of removed replica dirs, error
    replica dirs (*.err), garbage replica dirs (*.gar), tmp replica dirs (*.tmp) and origin
    replica dirs (*.ori) for disk migration.
    
    There are 2 metrics removed, since both are shared-log-related.
---
 src/nfs/nfs_client_impl.cpp                |   6 +-
 src/nfs/nfs_client_impl.h                  |   2 +-
 src/nfs/nfs_server_impl.cpp                |   6 +-
 src/nfs/nfs_server_impl.h                  |   2 +-
 src/replica/replica.cpp                    |  20 ++++-
 src/replica/replica.h                      |   6 ++
 src/replica/replica_2pc.cpp                |   2 +-
 src/replica/replica_check.cpp              |   5 +-
 src/replica/replica_chkpt.cpp              |   7 +-
 src/replica/replica_learn.cpp              |  12 ---
 src/replica/replica_stub.cpp               | 136 ++++++++++++-----------------
 src/replica/replica_stub.h                 |  21 ++---
 src/server/pegasus_mutation_duplicator.cpp |  12 +--
 src/server/pegasus_mutation_duplicator.h   |   4 +-
 src/utils/metrics.h                        |   4 +-
 15 files changed, 113 insertions(+), 132 deletions(-)

diff --git a/src/nfs/nfs_client_impl.cpp b/src/nfs/nfs_client_impl.cpp
index 8ddadc46c..4c10b4da8 100644
--- a/src/nfs/nfs_client_impl.cpp
+++ b/src/nfs/nfs_client_impl.cpp
@@ -48,7 +48,7 @@ METRIC_DEFINE_counter(server,
                       "The accumulated data size in bytes requested by client during nfs copy");
 
 METRIC_DEFINE_counter(server,
-                      nfs_client_failed_copy_requests,
+                      nfs_client_copy_failed_requests,
                       dsn::metric_unit::kRequests,
                       "The number of failed nfs copy requests (requested by client)");
 
@@ -121,7 +121,7 @@ nfs_client_impl::nfs_client_impl()
       _copy_requests_low(FLAGS_max_file_copy_request_count_per_file),
       _high_priority_remaining_time(FLAGS_high_priority_speed_rate),
       METRIC_VAR_INIT_server(nfs_client_copy_bytes),
-      METRIC_VAR_INIT_server(nfs_client_failed_copy_requests),
+      METRIC_VAR_INIT_server(nfs_client_copy_failed_requests),
       METRIC_VAR_INIT_server(nfs_client_write_bytes),
       METRIC_VAR_INIT_server(nfs_client_failed_writes)
 {
@@ -345,7 +345,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err,
     }
 
     if (err != ::dsn::ERR_OK) {
-        METRIC_VAR_INCREMENT(nfs_client_failed_copy_requests);
+        METRIC_VAR_INCREMENT(nfs_client_copy_failed_requests);
 
         if (!fc->user_req->is_finished) {
             if (reqc->retry_count > 0) {
diff --git a/src/nfs/nfs_client_impl.h b/src/nfs/nfs_client_impl.h
index 0c15fc8b3..183ac38a9 100644
--- a/src/nfs/nfs_client_impl.h
+++ b/src/nfs/nfs_client_impl.h
@@ -312,7 +312,7 @@ private:
     std::deque<copy_request_ex_ptr> _local_writes;
 
     METRIC_VAR_DECLARE_counter(nfs_client_copy_bytes);
-    METRIC_VAR_DECLARE_counter(nfs_client_failed_copy_requests);
+    METRIC_VAR_DECLARE_counter(nfs_client_copy_failed_requests);
     METRIC_VAR_DECLARE_counter(nfs_client_write_bytes);
     METRIC_VAR_DECLARE_counter(nfs_client_failed_writes);
 
diff --git a/src/nfs/nfs_server_impl.cpp b/src/nfs/nfs_server_impl.cpp
index 25632d4f9..ac2d6a14d 100644
--- a/src/nfs/nfs_server_impl.cpp
+++ b/src/nfs/nfs_server_impl.cpp
@@ -55,7 +55,7 @@ METRIC_DEFINE_counter(
 
 METRIC_DEFINE_counter(
     server,
-    nfs_server_failed_copy_requests,
+    nfs_server_copy_failed_requests,
     dsn::metric_unit::kRequests,
     "The number of nfs copy requests (received by server) that fail to read local file in server");
 
@@ -77,7 +77,7 @@ DSN_DECLARE_int32(file_close_expire_time_ms);
 nfs_service_impl::nfs_service_impl()
     : ::dsn::serverlet<nfs_service_impl>("nfs"),
       METRIC_VAR_INIT_server(nfs_server_copy_bytes),
-      METRIC_VAR_INIT_server(nfs_server_failed_copy_requests)
+      METRIC_VAR_INIT_server(nfs_server_copy_failed_requests)
 {
     _file_close_timer = ::dsn::tasking::enqueue_timer(
         LPC_NFS_FILE_CLOSE_TIMER,
@@ -167,7 +167,7 @@ void nfs_service_impl::internal_read_callback(error_code err, size_t sz, callbac
 
     if (err != ERR_OK) {
         LOG_ERROR("[nfs_service] read file {} failed, err = {}", cp.file_path, err);
-        METRIC_VAR_INCREMENT(nfs_server_failed_copy_requests);
+        METRIC_VAR_INCREMENT(nfs_server_copy_failed_requests);
     } else {
         METRIC_VAR_INCREMENT_BY(nfs_server_copy_bytes, sz);
     }
diff --git a/src/nfs/nfs_server_impl.h b/src/nfs/nfs_server_impl.h
index 4c07a4996..4a4c5b5c4 100644
--- a/src/nfs/nfs_server_impl.h
+++ b/src/nfs/nfs_server_impl.h
@@ -138,7 +138,7 @@ private:
         _send_token_buckets; // rate limiter of send to remote
 
     METRIC_VAR_DECLARE_counter(nfs_server_copy_bytes);
-    METRIC_VAR_DECLARE_counter(nfs_server_failed_copy_requests);
+    METRIC_VAR_DECLARE_counter(nfs_server_copy_failed_requests);
 
     std::unique_ptr<command_deregister> _nfs_max_send_rate_megabytes_cmd;
 
diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp
index 349bae53d..a660ab509 100644
--- a/src/replica/replica.cpp
+++ b/src/replica/replica.cpp
@@ -189,6 +189,21 @@ METRIC_DEFINE_counter(replica,
                       dsn::metric_unit::kLearns,
                       "The number of successful learns launched by learner");
 
+METRIC_DEFINE_counter(replica,
+                      prepare_failed_requests,
+                      dsn::metric_unit::kRequests,
+                      "The number of failed RPC_PREPARE requests");
+
+METRIC_DEFINE_counter(replica,
+                      group_check_failed_requests,
+                      dsn::metric_unit::kRequests,
+                      "The number of failed RPC_GROUP_CHECK requests launched by primary replicas");
+
+METRIC_DEFINE_counter(replica,
+                      emergency_checkpoints,
+                      dsn::metric_unit::kCheckpoints,
+                      "The number of triggered emergency checkpoints");
+
 namespace dsn {
 namespace replication {
 
@@ -264,7 +279,10 @@ replica::replica(replica_stub *stub,
       METRIC_VAR_INIT_replica(learn_lt_log_responses),
       METRIC_VAR_INIT_replica(learn_resets),
       METRIC_VAR_INIT_replica(learn_failed_count),
-      METRIC_VAR_INIT_replica(learn_successful_count)
+      METRIC_VAR_INIT_replica(learn_successful_count),
+      METRIC_VAR_INIT_replica(prepare_failed_requests),
+      METRIC_VAR_INIT_replica(group_check_failed_requests),
+      METRIC_VAR_INIT_replica(emergency_checkpoints)
 {
     CHECK(!_app_info.app_type.empty(), "");
     CHECK_NOTNULL(stub, "");
diff --git a/src/replica/replica.h b/src/replica/replica.h
index 25494fd2c..8feefb109 100644
--- a/src/replica/replica.h
+++ b/src/replica/replica.h
@@ -676,6 +676,12 @@ private:
     METRIC_VAR_DECLARE_counter(learn_failed_count);
     METRIC_VAR_DECLARE_counter(learn_successful_count);
 
+    METRIC_VAR_DECLARE_counter(prepare_failed_requests);
+
+    METRIC_VAR_DECLARE_counter(group_check_failed_requests);
+
+    METRIC_VAR_DECLARE_counter(emergency_checkpoints);
+
     dsn::task_tracker _tracker;
     // the thread access checker
     dsn::thread_access_checker _checker;
diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp
index 9525466b2..970df6688 100644
--- a/src/replica/replica_2pc.cpp
+++ b/src/replica/replica_2pc.cpp
@@ -763,7 +763,7 @@ void replica::on_prepare_reply(std::pair<mutation_ptr, partition_status::type> p
             }
         }
 
-        _stub->_counter_replicas_recent_prepare_fail_count->increment();
+        METRIC_VAR_INCREMENT(prepare_failed_requests);
 
         // make sure this is before any later commit ops
         // because now commit ops may lead to new prepare ops
diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp
index 8cd0325e2..59aece790 100644
--- a/src/replica/replica_check.cpp
+++ b/src/replica/replica_check.cpp
@@ -48,8 +48,6 @@
 #include "duplication/replica_duplicator_manager.h"
 #include "metadata_types.h"
 #include "mutation.h"
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counter_wrapper.h"
 #include "replica.h"
 #include "replica/prepare_list.h"
 #include "replica/replica_context.h"
@@ -65,6 +63,7 @@
 #include "utils/fail_point.h"
 #include "utils/flags.h"
 #include "utils/fmt_logging.h"
+#include "utils/metrics.h"
 #include "utils/string_view.h"
 #include "utils/thread_access_checker.h"
 
@@ -253,7 +252,7 @@ void replica::on_group_check_reply(error_code err,
             err = resp->err;
         }
         handle_remote_failure(req->config.status, req->node, err, "group check");
-        _stub->_counter_replicas_recent_group_check_fail_count->increment();
+        METRIC_VAR_INCREMENT(group_check_failed_requests);
     } else {
         if (resp->learner_status_ == learner_status::LearningSucceeded &&
             req->config.status == partition_status::PS_POTENTIAL_SECONDARY) {
diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp
index 5985e5d8d..7c9f6f931 100644
--- a/src/replica/replica_chkpt.cpp
+++ b/src/replica/replica_chkpt.cpp
@@ -50,8 +50,6 @@
 #include "duplication/replica_duplicator_manager.h"
 #include "metadata_types.h"
 #include "mutation_log.h"
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counter_wrapper.h"
 #include "replica.h"
 #include "replica/prepare_list.h"
 #include "replica/replica_context.h"
@@ -240,8 +238,9 @@ void replica::init_checkpoint(bool is_emergency)
                      0,
                      10_ms);
 
-    if (is_emergency)
-        _stub->_counter_recent_trigger_emergency_checkpoint_count->increment();
+    if (is_emergency) {
+        METRIC_VAR_INCREMENT(emergency_checkpoints);
+    }
 }
 
 // ThreadPool: THREAD_POOL_REPLICATION
diff --git a/src/replica/replica_learn.cpp b/src/replica/replica_learn.cpp
index b123fbe83..193ace628 100644
--- a/src/replica/replica_learn.cpp
+++ b/src/replica/replica_learn.cpp
@@ -80,18 +80,6 @@
 #include "utils/metrics.h"
 #include "utils/thread_access_checker.h"
 
-METRIC_DECLARE_counter(learn_count);
-METRIC_DECLARE_counter(learn_rounds);
-METRIC_DECLARE_counter(learn_copy_files);
-METRIC_DECLARE_counter(learn_copy_file_bytes);
-METRIC_DECLARE_counter(learn_copy_buffer_bytes);
-METRIC_DECLARE_counter(learn_lt_cache_responses);
-METRIC_DECLARE_counter(learn_lt_app_responses);
-METRIC_DECLARE_counter(learn_lt_log_responses);
-METRIC_DECLARE_counter(learn_resets);
-METRIC_DECLARE_counter(learn_failed_count);
-METRIC_DECLARE_counter(learn_successful_count);
-
 namespace dsn {
 namespace replication {
 
diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp
index 35c0b742d..48718952c 100644
--- a/src/replica/replica_stub.cpp
+++ b/src/replica/replica_stub.cpp
@@ -129,6 +129,41 @@ METRIC_DEFINE_gauge_int64(
     dsn::metric_unit::kBytes,
     "The max size of files that are copied from learnee among all learning replicas");
 
+METRIC_DEFINE_counter(server,
+                      moved_error_replicas,
+                      dsn::metric_unit::kReplicas,
+                      "The number of replicas whose dirs are moved as error");
+
+METRIC_DEFINE_counter(server,
+                      moved_garbage_replicas,
+                      dsn::metric_unit::kReplicas,
+                      "The number of replicas whose dirs are moved as garbage");
+
+METRIC_DEFINE_counter(server,
+                      replica_removed_dirs,
+                      dsn::metric_unit::kDirs,
+                      "The number of removed replica dirs");
+
+METRIC_DEFINE_gauge_int64(server,
+                          replica_error_dirs,
+                          dsn::metric_unit::kDirs,
+                          "The number of error replica dirs (*.err)");
+
+METRIC_DEFINE_gauge_int64(server,
+                          replica_garbage_dirs,
+                          dsn::metric_unit::kDirs,
+                          "The number of garbage replica dirs (*.gar)");
+
+METRIC_DEFINE_gauge_int64(server,
+                          replica_tmp_dirs,
+                          dsn::metric_unit::kDirs,
+                          "The number of tmp replica dirs (*.tmp) for disk migration");
+
+METRIC_DEFINE_gauge_int64(server,
+                          replica_origin_dirs,
+                          dsn::metric_unit::kDirs,
+                          "The number of origin replica dirs (*.ori) for disk migration");
+
 namespace dsn {
 namespace replication {
 
@@ -237,7 +272,14 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
       METRIC_VAR_INIT_server(closing_replicas),
       METRIC_VAR_INIT_server(learning_replicas),
       METRIC_VAR_INIT_server(learning_replicas_max_duration_ms),
-      METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes)
+      METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes),
+      METRIC_VAR_INIT_server(moved_error_replicas),
+      METRIC_VAR_INIT_server(moved_garbage_replicas),
+      METRIC_VAR_INIT_server(replica_removed_dirs),
+      METRIC_VAR_INIT_server(replica_error_dirs),
+      METRIC_VAR_INIT_server(replica_garbage_dirs),
+      METRIC_VAR_INIT_server(replica_tmp_dirs),
+      METRIC_VAR_INIT_server(replica_origin_dirs)
 {
 #ifdef DSN_ENABLE_GPERF
     _is_releasing_memory = false;
@@ -255,66 +297,6 @@ replica_stub::~replica_stub(void) { close(); }
 
 void replica_stub::install_perf_counters()
 {
-    _counter_replicas_recent_prepare_fail_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.recent.prepare.fail.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "prepare fail count in the recent period");
-    _counter_replicas_recent_replica_move_error_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.recent.replica.move.error.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "replica move to error count in the recent period");
-    _counter_replicas_recent_replica_move_garbage_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.recent.replica.move.garbage.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "replica move to garbage count in the recent period");
-    _counter_replicas_recent_replica_remove_dir_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.recent.replica.remove.dir.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "replica directory remove count in the recent period");
-    _counter_replicas_error_replica_dir_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.error.replica.dir.count",
-        COUNTER_TYPE_NUMBER,
-        "error replica directory(*.err) count");
-    _counter_replicas_garbage_replica_dir_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.garbage.replica.dir.count",
-        COUNTER_TYPE_NUMBER,
-        "garbage replica directory(*.gar) count");
-    _counter_replicas_tmp_replica_dir_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.tmp.replica.dir.count",
-        COUNTER_TYPE_NUMBER,
-        "disk migration tmp replica directory(*.tmp) count");
-    _counter_replicas_origin_replica_dir_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.origin.replica.dir.count",
-        COUNTER_TYPE_NUMBER,
-        "disk migration origin replica directory(.ori) count");
-
-    _counter_replicas_recent_group_check_fail_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.recent.group.check.fail.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "group check fail count in the recent period");
-
-    _counter_shared_log_size.init_app_counter(
-        "eon.replica_stub", "shared.log.size(MB)", COUNTER_TYPE_NUMBER, "shared log size(MB)");
-    _counter_shared_log_recent_write_size.init_app_counter(
-        "eon.replica_stub",
-        "shared.log.recent.write.size",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "shared log write size in the recent period");
-    _counter_recent_trigger_emergency_checkpoint_count.init_app_counter(
-        "eon.replica_stub",
-        "recent.trigger.emergency.checkpoint.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "trigger emergency checkpoint count in the recent period");
-
     // <- Duplication Metrics ->
 
     _counter_dup_confirmed_rate.init_app_counter("eon.replica_stub",
@@ -563,10 +545,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
     _options.slog_dir = cdir;
     initialize_fs_manager(_options.data_dirs, _options.data_dir_tags);
 
-    _log = new mutation_log_shared(_options.slog_dir,
-                                   FLAGS_log_shared_file_size_mb,
-                                   FLAGS_log_shared_force_flush,
-                                   &_counter_shared_log_recent_write_size);
+    _log = new mutation_log_shared(
+        _options.slog_dir, FLAGS_log_shared_file_size_mb, FLAGS_log_shared_force_flush);
     LOG_INFO("slog_dir = {}", _options.slog_dir);
 
     // init rps
@@ -670,7 +650,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
         for (auto it = rps.begin(); it != rps.end(); ++it) {
             it->second->close();
             move_to_err_path(it->second->dir(), "initialize replica");
-            _counter_replicas_recent_replica_move_error_count->increment();
+            METRIC_VAR_INCREMENT(moved_error_replicas);
         }
         rps.clear();
 
@@ -680,10 +660,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
         CHECK(utils::filesystem::remove_path(_options.slog_dir),
               "remove directory {} failed",
               _options.slog_dir);
-        _log = new mutation_log_shared(_options.slog_dir,
-                                       FLAGS_log_shared_file_size_mb,
-                                       FLAGS_log_shared_force_flush,
-                                       &_counter_shared_log_recent_write_size);
+        _log = new mutation_log_shared(
+            _options.slog_dir, FLAGS_log_shared_file_size_mb, FLAGS_log_shared_force_flush);
         CHECK_EQ_MSG(_log->open(nullptr, [this](error_code err) { this->handle_log_failure(err); }),
                      ERR_OK,
                      "restart log service failed");
@@ -1756,7 +1734,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id)
         LOG_WARNING("gc_replica: replica_dir_op succeed to move directory '{}' to '{}'",
                     replica_path,
                     rename_path);
-        _counter_replicas_recent_replica_move_garbage_count->increment();
+        METRIC_VAR_INCREMENT(moved_garbage_replicas);
     }
 }
 
@@ -1889,8 +1867,6 @@ void replica_stub::on_gc()
                 }
             }
         }
-
-        _counter_shared_log_size->set(_log->total_size() / (1024 * 1024));
     }
 
     // statistic learning info
@@ -1973,11 +1949,11 @@ void replica_stub::on_disk_stat()
     update_disk_holding_replicas();
     update_disks_status();
 
-    _counter_replicas_error_replica_dir_count->set(report.error_replica_count);
-    _counter_replicas_garbage_replica_dir_count->set(report.garbage_replica_count);
-    _counter_replicas_tmp_replica_dir_count->set(report.disk_migrate_tmp_count);
-    _counter_replicas_origin_replica_dir_count->set(report.disk_migrate_origin_count);
-    _counter_replicas_recent_replica_remove_dir_count->add(report.remove_dir_count);
+    METRIC_VAR_SET(replica_error_dirs, report.error_replica_count);
+    METRIC_VAR_SET(replica_garbage_dirs, report.garbage_replica_count);
+    METRIC_VAR_SET(replica_tmp_dirs, report.disk_migrate_tmp_count);
+    METRIC_VAR_SET(replica_origin_dirs, report.disk_migrate_origin_count);
+    METRIC_VAR_INCREMENT_BY(replica_removed_dirs, report.remove_dir_count);
 
     LOG_INFO("finish to update disk stat, time_used_ns = {}", dsn_now_ns() - start);
 }
@@ -2279,7 +2255,7 @@ replica *replica_stub::load_replica(const char *dir)
         // clear work on failure
         if (dsn::utils::filesystem::directory_exists(dir)) {
             move_to_err_path(dir, "load replica");
-            _counter_replicas_recent_replica_move_error_count->increment();
+            METRIC_VAR_INCREMENT(moved_error_replicas);
             _fs_manager.remove_replica(pid);
         }
 
@@ -2364,7 +2340,7 @@ void replica_stub::close_replica(replica_ptr r)
     if (r->is_data_corrupted()) {
         _fs_manager.remove_replica(id);
         move_to_err_path(r->dir(), "trash replica");
-        _counter_replicas_recent_replica_move_error_count->increment();
+        METRIC_VAR_INCREMENT(moved_error_replicas);
     }
 
     LOG_INFO("{}: finish to close replica", name);
diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h
index adaef7220..7bcf4e68b 100644
--- a/src/replica/replica_stub.h
+++ b/src/replica/replica_stub.h
@@ -513,20 +513,13 @@ private:
     METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_duration_ms);
     METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_copy_file_bytes);
 
-    perf_counter_wrapper _counter_replicas_recent_prepare_fail_count;
-    perf_counter_wrapper _counter_replicas_recent_replica_move_error_count;
-    perf_counter_wrapper _counter_replicas_recent_replica_move_garbage_count;
-    perf_counter_wrapper _counter_replicas_recent_replica_remove_dir_count;
-    perf_counter_wrapper _counter_replicas_error_replica_dir_count;
-    perf_counter_wrapper _counter_replicas_garbage_replica_dir_count;
-    perf_counter_wrapper _counter_replicas_tmp_replica_dir_count;
-    perf_counter_wrapper _counter_replicas_origin_replica_dir_count;
-
-    perf_counter_wrapper _counter_replicas_recent_group_check_fail_count;
-
-    perf_counter_wrapper _counter_shared_log_size;
-    perf_counter_wrapper _counter_shared_log_recent_write_size;
-    perf_counter_wrapper _counter_recent_trigger_emergency_checkpoint_count;
+    METRIC_VAR_DECLARE_counter(moved_error_replicas);
+    METRIC_VAR_DECLARE_counter(moved_garbage_replicas);
+    METRIC_VAR_DECLARE_counter(replica_removed_dirs);
+    METRIC_VAR_DECLARE_gauge_int64(replica_error_dirs);
+    METRIC_VAR_DECLARE_gauge_int64(replica_garbage_dirs);
+    METRIC_VAR_DECLARE_gauge_int64(replica_tmp_dirs);
+    METRIC_VAR_DECLARE_gauge_int64(replica_origin_dirs);
 
     // <- Duplication Metrics ->
     // TODO(wutao1): calculate the counters independently for each remote cluster
diff --git a/src/server/pegasus_mutation_duplicator.cpp b/src/server/pegasus_mutation_duplicator.cpp
index 8d87ce60b..74832d5e6 100644
--- a/src/server/pegasus_mutation_duplicator.cpp
+++ b/src/server/pegasus_mutation_duplicator.cpp
@@ -48,12 +48,12 @@
 #include "utils/rand.h"
 
 METRIC_DEFINE_counter(replica,
-                      successful_mutation_dup_requests,
+                      mutation_dup_successful_requests,
                       dsn::metric_unit::kRequests,
                       "The number of successful DUPLICATE requests sent from mutation duplicator");
 
 METRIC_DEFINE_counter(replica,
-                      failed_mutation_dup_requests,
+                      mutation_dup_failed_requests,
                       dsn::metric_unit::kRequests,
                       "The number of failed DUPLICATE requests sent from mutation duplicator");
 
@@ -107,8 +107,8 @@ pegasus_mutation_duplicator::pegasus_mutation_duplicator(dsn::replication::repli
                                                          dsn::string_view app)
     : mutation_duplicator(r),
       _remote_cluster(remote_cluster),
-      METRIC_VAR_INIT_replica(successful_mutation_dup_requests),
-      METRIC_VAR_INIT_replica(failed_mutation_dup_requests)
+      METRIC_VAR_INIT_replica(mutation_dup_successful_requests),
+      METRIC_VAR_INIT_replica(mutation_dup_failed_requests)
 {
     // initialize pegasus-client when this class is first time used.
     static __attribute__((unused)) bool _dummy = pegasus_client_factory::initialize(nullptr);
@@ -162,7 +162,7 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash,
     }
 
     if (perr != PERR_OK || err != dsn::ERR_OK) {
-        METRIC_VAR_INCREMENT(failed_mutation_dup_requests);
+        METRIC_VAR_INCREMENT(mutation_dup_failed_requests);
 
         // randomly log the 1% of the failed duplicate rpc, because minor number of
         // errors are acceptable.
@@ -175,7 +175,7 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash,
         // duplicating an illegal write to server is unacceptable, fail fast.
         CHECK_NE_PREFIX_MSG(perr, PERR_INVALID_ARGUMENT, rpc.response().error_hint);
     } else {
-        METRIC_VAR_INCREMENT(successful_mutation_dup_requests);
+        METRIC_VAR_INCREMENT(mutation_dup_successful_requests);
         _total_shipped_size +=
             rpc.dsn_request()->header->body_length + rpc.dsn_request()->header->hdr_length;
     }
diff --git a/src/server/pegasus_mutation_duplicator.h b/src/server/pegasus_mutation_duplicator.h
index 9a5aa086c..dfe126df7 100644
--- a/src/server/pegasus_mutation_duplicator.h
+++ b/src/server/pegasus_mutation_duplicator.h
@@ -89,8 +89,8 @@ private:
 
     size_t _total_shipped_size{0};
 
-    METRIC_VAR_DECLARE_counter(successful_mutation_dup_requests);
-    METRIC_VAR_DECLARE_counter(failed_mutation_dup_requests);
+    METRIC_VAR_DECLARE_counter(mutation_dup_successful_requests);
+    METRIC_VAR_DECLARE_counter(mutation_dup_failed_requests);
 };
 
 // Decodes the binary `request_data` into write request in thrift struct, and
diff --git a/src/utils/metrics.h b/src/utils/metrics.h
index b5e31c050..ec2fb6977 100644
--- a/src/utils/metrics.h
+++ b/src/utils/metrics.h
@@ -652,8 +652,8 @@ enum class metric_unit : size_t
     kMegaBytes,
     kCapacityUnits,
     kPercent,
-    kPartitions,
     kReplicas,
+    kPartitions,
     kServers,
     kRequests,
     kResponses,
@@ -662,7 +662,9 @@ enum class metric_unit : size_t
     kValues,
     kKeys,
     kFiles,
+    kDirs,
     kAmplification,
+    kCheckpoints,
     kFlushes,
     kCompactions,
     kWrites,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org