You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by wa...@apache.org on 2023/05/17 08:48:15 UTC

[incubator-pegasus] 17/26: feat(new_metrics): migrate partition-level metrics for partition_guardian (#1440)

This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch migrate-metrics-dev
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git

commit aad972fa00a7fd38f798695f9798373382076ae4
Author: Dan Wang <wa...@apache.org>
AuthorDate: Sun Apr 16 21:39:00 2023 +0800

    feat(new_metrics): migrate partition-level metrics for partition_guardian (#1440)
    
    https://github.com/apache/incubator-pegasus/issues/1331
    
    In perf counters, there's only one metric for partition_guardian, namely
    the number of operations that fail to choose the primary replica, which
    is server-level. It would be changed to partition-level in new metrics
    since this could give which partitions fail to choose primaries and how
    frequency those happen. Still, to compute table-level or server-level
    metrics just aggregate on partition-level ones.
---
 src/meta/partition_guardian.cpp | 14 ++++++--------
 src/meta/partition_guardian.h   |  2 --
 src/meta/table_metrics.cpp      |  8 +++++++-
 src/meta/table_metrics.h        |  5 +++++
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/meta/partition_guardian.cpp b/src/meta/partition_guardian.cpp
index 09b0862c4..47d4ace37 100644
--- a/src/meta/partition_guardian.cpp
+++ b/src/meta/partition_guardian.cpp
@@ -29,9 +29,11 @@
 #include "meta/meta_data.h"
 #include "meta/meta_service.h"
 #include "meta/server_load_balancer.h"
-#include "perf_counter/perf_counter.h"
+#include "meta/server_state.h"
+#include "meta/table_metrics.h"
 #include "utils/flags.h"
 #include "utils/fmt_logging.h"
+#include "utils/metrics.h"
 #include "utils/string_conv.h"
 #include "utils/strings.h"
 #include "utils/time_utils.h"
@@ -53,12 +55,6 @@ partition_guardian::partition_guardian(meta_service *svc) : _svc(svc)
     } else {
         _replica_assign_delay_ms_for_dropouts = 0;
     }
-
-    _recent_choose_primary_fail_count.init_app_counter(
-        "eon.server_load_balancer",
-        "recent_choose_primary_fail_count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "choose primary fail count in the recent period");
 }
 
 pc_status partition_guardian::cure(meta_view view,
@@ -452,7 +448,9 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi
             LOG_WARNING("{}: don't select any node for security reason, administrator can select "
                         "a proper one by shell",
                         gpid_name);
-            _recent_choose_primary_fail_count->increment();
+            METRIC_INCREMENT(_svc->get_server_state()->get_table_metric_entities(),
+                             choose_primary_failed_operations,
+                             gpid);
             ddd_partition_info pinfo;
             pinfo.config = pc;
             for (int i = 0; i < cc.dropped.size(); ++i) {
diff --git a/src/meta/partition_guardian.h b/src/meta/partition_guardian.h
index 23d78563a..9c77da7e5 100644
--- a/src/meta/partition_guardian.h
+++ b/src/meta/partition_guardian.h
@@ -29,7 +29,6 @@
 #include "dsn.layer2_types.h"
 #include "meta_admin_types.h"
 #include "meta_data.h"
-#include "perf_counter/perf_counter_wrapper.h"
 #include "runtime/rpc/rpc_address.h"
 #include "utils/command_manager.h"
 #include "utils/zlocks.h"
@@ -91,7 +90,6 @@ private:
     }
 
     meta_service *_svc;
-    perf_counter_wrapper _recent_choose_primary_fail_count;
 
     mutable zlock _ddd_partitions_lock; // [
     std::map<gpid, ddd_partition_info> _ddd_partitions;
diff --git a/src/meta/table_metrics.cpp b/src/meta/table_metrics.cpp
index 73b992062..c63dd1f09 100644
--- a/src/meta/table_metrics.cpp
+++ b/src/meta/table_metrics.cpp
@@ -65,6 +65,11 @@ METRIC_DEFINE_counter(partition,
                       dsn::metric_unit::kOperations,
                       "The number of balance operations by greedy balancer that copy secondaries");
 
+METRIC_DEFINE_counter(partition,
+                      choose_primary_failed_operations,
+                      dsn::metric_unit::kOperations,
+                      "The number of operations that fail to choose the primary replica");
+
 METRIC_DEFINE_entity(table);
 
 // The number of partitions in each status, see `health_status` and `partition_health_status()`
@@ -133,7 +138,8 @@ partition_metrics::partition_metrics(int32_t table_id, int32_t partition_id)
       METRIC_VAR_INIT_partition(greedy_recent_balance_operations),
       METRIC_VAR_INIT_partition(greedy_move_primary_operations),
       METRIC_VAR_INIT_partition(greedy_copy_primary_operations),
-      METRIC_VAR_INIT_partition(greedy_copy_secondary_operations)
+      METRIC_VAR_INIT_partition(greedy_copy_secondary_operations),
+      METRIC_VAR_INIT_partition(choose_primary_failed_operations)
 {
 }
 
diff --git a/src/meta/table_metrics.h b/src/meta/table_metrics.h
index 39fec611d..7dc5e83a3 100644
--- a/src/meta/table_metrics.h
+++ b/src/meta/table_metrics.h
@@ -52,6 +52,8 @@ public:
     METRIC_DEFINE_INCREMENT_BY(greedy_copy_primary_operations)
     METRIC_DEFINE_INCREMENT_BY(greedy_copy_secondary_operations)
 
+    METRIC_DEFINE_INCREMENT(choose_primary_failed_operations)
+
 private:
     const int32_t _table_id;
     const int32_t _partition_id;
@@ -64,6 +66,7 @@ private:
     METRIC_VAR_DECLARE_counter(greedy_move_primary_operations);
     METRIC_VAR_DECLARE_counter(greedy_copy_primary_operations);
     METRIC_VAR_DECLARE_counter(greedy_copy_secondary_operations);
+    METRIC_VAR_DECLARE_counter(choose_primary_failed_operations);
 
     DISALLOW_COPY_AND_ASSIGN(partition_metrics);
 };
@@ -112,6 +115,7 @@ public:
     __METRIC_DEFINE_INCREMENT(partition_configuration_changes)
     __METRIC_DEFINE_INCREMENT(unwritable_partition_changes)
     __METRIC_DEFINE_INCREMENT(writable_partition_changes)
+    __METRIC_DEFINE_INCREMENT(choose_primary_failed_operations)
 
 #undef __METRIC_DEFINE_INCREMENT
 
@@ -221,6 +225,7 @@ public:
     __METRIC_DEFINE_INCREMENT(partition_configuration_changes)
     __METRIC_DEFINE_INCREMENT(unwritable_partition_changes)
     __METRIC_DEFINE_INCREMENT(writable_partition_changes)
+    __METRIC_DEFINE_INCREMENT(choose_primary_failed_operations)
 
 #undef __METRIC_DEFINE_INCREMENT
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org