You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2020/01/15 19:40:26 UTC

[kudu] branch master updated: [metrics] Fix bugs when metrics do merge

This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 926bca8  [metrics] Fix bugs when metrics do merge
926bca8 is described below

commit 926bca85827e597f2923652c3b3b017c7c071f3b
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Sun Jan 12 17:53:29 2020 +0800

    [metrics] Fix bugs when metrics do merge
    
    1. METRIC_merged_entities_count_of_server should never retire.
    2. Add METRIC_merged_entities_count_of_tablet/table's ref counter
       to avoid retiring before its bounded MetricEntity destroyed.
    3. Metric's modification epoch should be updated when AtomicGauge
       set_value().
    4. Merge should abort if metrics are invalid for merging,
       FunctionGauge included.
    
    Change-Id: I36ee6244964820e3326742c6902a578bf23041d1
    Reviewed-on: http://gerrit.cloudera.org:8080/15014
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/master/catalog_manager.cc | 1 -
 src/kudu/master/table_metrics.cc   | 8 ++++++--
 src/kudu/master/table_metrics.h    | 1 +
 src/kudu/server/server_base.cc     | 3 ++-
 src/kudu/tablet/tablet.cc          | 1 -
 src/kudu/tablet/tablet_metrics.cc  | 6 +++++-
 src/kudu/tablet/tablet_metrics.h   | 3 +++
 src/kudu/util/metrics.h            | 9 +++++++--
 8 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index a2bc94d..876e372 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -5626,7 +5626,6 @@ void TableInfo::RegisterMetrics(MetricRegistry* metric_registry, const string& t
     attrs["table_name"] = table_name;
     metric_entity_ = METRIC_ENTITY_table.Instantiate(metric_registry, table_id_, attrs);
     metrics_.reset(new TableMetrics(metric_entity_));
-    METRIC_merged_entities_count_of_table.InstantiateHidden(metric_entity_, 1);
   }
 }
 
diff --git a/src/kudu/master/table_metrics.cc b/src/kudu/master/table_metrics.cc
index cde684e..d67760a 100644
--- a/src/kudu/master/table_metrics.cc
+++ b/src/kudu/master/table_metrics.cc
@@ -20,6 +20,8 @@
 
 #include "kudu/gutil/map-util.h"
 
+METRIC_DECLARE_gauge_size(merged_entities_count_of_table);
+
 namespace kudu {
 namespace master {
 
@@ -36,12 +38,14 @@ METRIC_DEFINE_gauge_uint64(table, live_row_count, "Table Live Row count",
     kudu::MetricLevel::kInfo);
 
 #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0))
-
+#define HIDEINIT(x, v) x(METRIC_##x.InstantiateHidden(entity, v))
 TableMetrics::TableMetrics(const scoped_refptr<MetricEntity>& entity)
   : GINIT(on_disk_size),
-    GINIT(live_row_count) {
+    GINIT(live_row_count),
+    HIDEINIT(merged_entities_count_of_table, 1) {
 }
 #undef GINIT
+#undef HIDEINIT
 
 void TableMetrics::AddTabletNoLiveRowCount(const std::string& tablet_id) {
   std::lock_guard<simple_spinlock> l(lock_);
diff --git a/src/kudu/master/table_metrics.h b/src/kudu/master/table_metrics.h
index f90f52e..1bb17f6 100644
--- a/src/kudu/master/table_metrics.h
+++ b/src/kudu/master/table_metrics.h
@@ -47,6 +47,7 @@ struct TableMetrics {
 
   scoped_refptr<AtomicGauge<uint64_t>> on_disk_size;
   scoped_refptr<AtomicGauge<uint64_t>> live_row_count;
+  scoped_refptr<AtomicGauge<uint64_t>> merged_entities_count_of_table;
 
   void AddTabletNoLiveRowCount(const std::string& tablet_id);
   void DeleteTabletNoLiveRowCount(const std::string& tablet_id);
diff --git a/src/kudu/server/server_base.cc b/src/kudu/server/server_base.cc
index c089eb5..74be49c 100644
--- a/src/kudu/server/server_base.cc
+++ b/src/kudu/server/server_base.cc
@@ -420,7 +420,8 @@ ServerBase::ServerBase(string name, const ServerBaseOptions& options,
           MonoDelta::FromSeconds(FLAGS_dns_resolver_cache_ttl_sec))),
       options_(options),
       stop_background_threads_latch_(1) {
-  METRIC_merged_entities_count_of_server.InstantiateHidden(metric_entity_, 1);
+  metric_entity_->NeverRetire(
+      METRIC_merged_entities_count_of_server.InstantiateHidden(metric_entity_, 1));
 
   FsManagerOpts fs_opts;
   fs_opts.metric_entity = metric_entity_;
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index f20f2a3..dd85ba2 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -255,7 +255,6 @@ Tablet::Tablet(scoped_refptr<TabletMetadata> metadata,
     METRIC_last_write_elapsed_time.InstantiateFunctionGauge(
       metric_entity_, Bind(&Tablet::LastWriteElapsedSeconds, Unretained(this)), MergeType::kMin)
       ->AutoDetach(&metric_detacher_);
-    METRIC_merged_entities_count_of_tablet.InstantiateHidden(metric_entity_, 1);
   }
 
   if (FLAGS_tablet_throttler_rpc_per_sec > 0 || FLAGS_tablet_throttler_bytes_per_sec > 0) {
diff --git a/src/kudu/tablet/tablet_metrics.cc b/src/kudu/tablet/tablet_metrics.cc
index e7bf936..c3d05fe 100644
--- a/src/kudu/tablet/tablet_metrics.cc
+++ b/src/kudu/tablet/tablet_metrics.cc
@@ -312,6 +312,7 @@ METRIC_DEFINE_gauge_double(tablet, average_diskrowset_height, "Average DiskRowSe
                            "replica. The larger the average height, the more "
                            "uncompacted the tablet replica is.",
                            kudu::MetricLevel::kInfo);
+METRIC_DECLARE_gauge_size(merged_entities_count_of_tablet);
 
 namespace kudu {
 namespace tablet {
@@ -319,6 +320,7 @@ namespace tablet {
 #define MINIT(x) x(METRIC_##x.Instantiate(entity))
 #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0))
 #define MEANINIT(x) x(METRIC_##x.InstantiateMeanGauge(entity))
+#define HIDEINIT(x, v) x(METRIC_##x.InstantiateHidden(entity, v))
 TabletMetrics::TabletMetrics(const scoped_refptr<MetricEntity>& entity)
   : MINIT(rows_inserted),
     MINIT(rows_upserted),
@@ -363,11 +365,13 @@ TabletMetrics::TabletMetrics(const scoped_refptr<MetricEntity>& entity)
     MINIT(undo_delta_block_gc_delete_duration),
     MINIT(undo_delta_block_gc_perform_duration),
     MINIT(leader_memory_pressure_rejections),
-    MEANINIT(average_diskrowset_height) {
+    MEANINIT(average_diskrowset_height),
+    HIDEINIT(merged_entities_count_of_tablet, 1) {
 }
 #undef MINIT
 #undef GINIT
 #undef MEANINIT
+#undef HIDEINIT
 
 void TabletMetrics::AddProbeStats(const ProbeStats* stats_array, int len,
                                   Arena* work_arena) {
diff --git a/src/kudu/tablet/tablet_metrics.h b/src/kudu/tablet/tablet_metrics.h
index 1295b69..febc959 100644
--- a/src/kudu/tablet/tablet_metrics.h
+++ b/src/kudu/tablet/tablet_metrics.h
@@ -101,6 +101,9 @@ struct TabletMetrics {
 
   // Compaction metrics.
   scoped_refptr<MeanGauge> average_diskrowset_height;
+
+  // Static metrics.
+  scoped_refptr<AtomicGauge<uint64_t>> merged_entities_count_of_tablet;
 };
 
 } // namespace tablet
diff --git a/src/kudu/util/metrics.h b/src/kudu/util/metrics.h
index 20f8455..899e2a7 100644
--- a/src/kudu/util/metrics.h
+++ b/src/kudu/util/metrics.h
@@ -1103,14 +1103,15 @@ class AtomicGauge : public Gauge {
   T value() const {
     return static_cast<T>(value_.Load(kMemOrderRelease));
   }
-  virtual void set_value(const T& value) {
+  void set_value(const T& value) {
+    UpdateModificationEpoch();
     value_.Store(static_cast<int64_t>(value), kMemOrderNoBarrier);
   }
   void Increment() {
     UpdateModificationEpoch();
     value_.IncrementBy(1, kMemOrderNoBarrier);
   }
-  virtual void IncrementBy(int64_t amount) {
+  void IncrementBy(int64_t amount) {
     UpdateModificationEpoch();
     value_.IncrementBy(amount, kMemOrderNoBarrier);
   }
@@ -1289,6 +1290,10 @@ class FunctionGauge : public Gauge {
       return;
     }
 
+    if (InvalidateIfNeededInMerge(other)) {
+      return;
+    }
+
     // It's not needed to check whether a FunctionGauge is InvalidateIfNeededInMerge
     // or not, because it's always 'touched' after constructing.
     auto other_value = down_cast<FunctionGauge<T>*>(other.get())->value();