You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by la...@apache.org on 2022/03/21 04:08:07 UTC

[kudu] 02/02: [metrics] Add table level metrics column_count and schema_version on master

This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8409d28a5ace591f635b029cb37ebd70f3b8eb10
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Mon Feb 21 18:08:30 2022 +0800

    [metrics] Add table level metrics column_count and schema_version on master
    
    This patch adds some table level metrics on master.
    - column_count: The column count in the table's latest schema. Since too
      many column count will lead to bad performance and space
      amplification, we can add some monitor alerts based on this
      metric after this.
    - schema_version: The table's schema version. Too many or too
      frequently schema changes is also a metric worth to be monitored.
    
    Change-Id: Ia0671cfb0758d53e950a4e35a968dae15f82d18e
    Reviewed-on: http://gerrit.cloudera.org:8080/18258
    Tested-by: Kudu Jenkins
    Reviewed-by: Abhishek Chennaka <ac...@cloudera.com>
    Reviewed-by: Yifan Zhang <ch...@163.com>
    Reviewed-by: Yingchun Lai <ac...@gmail.com>
---
 src/kudu/master/catalog_manager.cc | 25 +++++++++++++++---
 src/kudu/master/catalog_manager.h  | 10 +++++---
 src/kudu/master/master-test.cc     | 52 ++++++++++++++++++++++++++++++++++++--
 src/kudu/master/table_metrics.cc   | 10 ++++++++
 src/kudu/master/table_metrics.h    |  2 ++
 5 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 3f5333f..1b7aa22 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -590,6 +590,10 @@ class TableLoader : public TableVisitor {
       // It's unnecessary to register metrics for the deleted tables.
       table->RegisterMetrics(catalog_manager_->master_->metric_registry(),
           CatalogManager::NormalizeTableName(metadata.name()));
+
+      // Update table's schema related metrics after being loaded.
+      table->UpdateSchemaMetrics();
+
       LOG(INFO) << Substitute("Loaded metadata for table $0", table->ToString());
     }
     VLOG(2) << Substitute("Metadata for table $0: $1",
@@ -2103,6 +2107,9 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   }
   TRACE("Inserted table and tablets into CatalogManager maps");
 
+  // Update table's schema related metrics after being created.
+  table->UpdateSchemaMetrics();
+
   resp->set_table_id(table->id());
   VLOG(1) << "Created table " << table->ToString();
   background_tasks_->Wake();
@@ -3383,6 +3390,9 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB& req,
     table_locations_cache_->Remove(table->id());
   }
 
+  // Update table's schema related metrics after being altered.
+  table->UpdateSchemaMetrics();
+
   background_tasks_->Wake();
   return Status::OK();
 }
@@ -4981,7 +4991,7 @@ Status CatalogManager::ProcessTabletReport(
       if (report.has_stats()) {
         // For the versions >= 1.11.x, the tserver reports stats. But keep in
         // mind that 'live_row_count' is not supported for the legacy replicas.
-        tablet->table()->UpdateMetrics(tablet_id, tablet->GetStats(), report.stats());
+        tablet->table()->UpdateStatsMetrics(tablet_id, tablet->GetStats(), report.stats());
         tablet->UpdateStats(report.stats());
       } else {
         // For the versions < 1.11.x, the tserver doesn't report stats. Thus,
@@ -6739,9 +6749,9 @@ void TableInfo::UnregisterMetrics() {
   }
 }
 
-void TableInfo::UpdateMetrics(const string& tablet_id,
-                              const tablet::ReportedTabletStatsPB& old_stats,
-                              const tablet::ReportedTabletStatsPB& new_stats) {
+void TableInfo::UpdateStatsMetrics(const string& tablet_id,
+                                   const tablet::ReportedTabletStatsPB& old_stats,
+                                   const tablet::ReportedTabletStatsPB& new_stats) {
   if (!metrics_) {
     return;
   }
@@ -6808,6 +6818,13 @@ void TableInfo::UpdateMetrics(const string& tablet_id,
   }
 }
 
+void TableInfo::UpdateSchemaMetrics() {
+  TableMetadataLock l(this, LockMode::READ);
+  const SysTablesEntryPB& pb = metadata().state().pb;
+  metrics_->column_count->set_value(pb.schema().columns().size());
+  metrics_->schema_version->set_value(pb.version());
+}
+
 void TableInfo::InvalidateMetrics(const std::string& tablet_id) {
   if (!metrics_) return;
   if (!metrics_->ContainsTabletNoOnDiskSize(tablet_id)) {
diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h
index 0379ec1..b23b5b7 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -360,9 +360,13 @@ class TableInfo : public RefCountedThreadSafe<TableInfo> {
   void UnregisterMetrics();
 
   // Update stats belonging to 'tablet_id' in the table's metrics.
-  void UpdateMetrics(const std::string& tablet_id,
-                     const tablet::ReportedTabletStatsPB& old_stats,
-                     const tablet::ReportedTabletStatsPB& new_stats);
+  void UpdateStatsMetrics(const std::string& tablet_id,
+                          const tablet::ReportedTabletStatsPB& old_stats,
+                          const tablet::ReportedTabletStatsPB& new_stats);
+
+  // Update table's schema related metrics, for exapmle, schema version,
+  // column count, and etc.
+  void UpdateSchemaMetrics();
 
   // Invalidate stats belonging to 'tablet_id' in the table's metrics.
   void InvalidateMetrics(const std::string& tablet_id);
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index ddf6adc..db34547 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -2491,7 +2491,7 @@ TEST_F(MasterTest, TestHideLiveRowCountInTableMetrics) {
       old_stats.set_live_row_count(1);
       new_stats.set_live_row_count(1);
     }
-    tables[0]->UpdateMetrics(tablet->id(), old_stats, new_stats);
+    tables[0]->UpdateStatsMetrics(tablet->id(), old_stats, new_stats);
   };
 
   // Trigger to cause 'live_row_count' invisible.
@@ -2560,7 +2560,7 @@ TEST_F(MasterTest, TestTableStatisticsWithOldVersion) {
     tablet::ReportedTabletStatsPB new_stats;
     new_stats.set_on_disk_size(1024);
     new_stats.set_live_row_count(1000);
-    table->UpdateMetrics(tablets.back()->id(), old_stats, new_stats);
+    table->UpdateStatsMetrics(tablets.back()->id(), old_stats, new_stats);
     ASSERT_TRUE(table->GetMetrics()->TableSupportsOnDiskSize());
     ASSERT_TRUE(table->GetMetrics()->TableSupportsLiveRowCount());
     ASSERT_EQ(1024, table->GetMetrics()->on_disk_size->value());
@@ -2627,6 +2627,54 @@ TEST_F(MasterTest, TestDeletedTablesAndTabletsCleanup) {
   });
 }
 
+TEST_F(MasterTest, TestTableSchemaMetrics) {
+  const char* kTableName = "testtable";
+  const Schema kTableSchema({ ColumnSchema("key", INT32) }, 1);
+  ASSERT_OK(CreateTable(kTableName, kTableSchema));
+
+  vector<scoped_refptr<TableInfo>> tables;
+  {
+    CatalogManager::ScopedLeaderSharedLock l(master_->catalog_manager());
+    master_->catalog_manager()->GetAllTables(&tables);
+    ASSERT_EQ(1, tables.size());
+  }
+  const auto& table = tables[0];
+
+  ASSERT_EQ(1, table->GetMetrics()->column_count->value());
+  ASSERT_EQ(0, table->GetMetrics()->schema_version->value());
+
+  {
+    AlterTableRequestPB req;
+    req.mutable_table()->set_table_name(kTableName);
+    AlterTableRequestPB::Step* step = req.add_alter_schema_steps();
+    step->set_type(AlterTableRequestPB::ADD_COLUMN);
+    ColumnSchemaToPB(ColumnSchema("c1", INT32, true),
+                     step->mutable_add_column()->mutable_schema());
+
+    AlterTableResponsePB resp;
+    RpcController controller;
+    ASSERT_OK(proxy_->AlterTable(req, &resp, &controller));
+    ASSERT_FALSE(resp.has_error());
+    ASSERT_EQ(2, table->GetMetrics()->column_count->value());
+    ASSERT_EQ(1, table->GetMetrics()->schema_version->value());
+  }
+
+  {
+    AlterTableRequestPB req;
+    req.mutable_table()->set_table_name(kTableName);
+    AlterTableRequestPB::Step* step = req.add_alter_schema_steps();
+    step->set_type(AlterTableRequestPB::DROP_COLUMN);
+    step->mutable_drop_column()->set_name("c1");
+
+    AlterTableResponsePB resp;
+    RpcController controller;
+    ASSERT_OK(proxy_->AlterTable(req, &resp, &controller));
+    ASSERT_FALSE(resp.has_error());
+    ASSERT_EQ(1, table->GetMetrics()->column_count->value());
+    ASSERT_EQ(2, table->GetMetrics()->schema_version->value());
+  }
+}
+
 class AuthzTokenMasterTest : public MasterTest,
                              public ::testing::WithParamInterface<bool> {};
 
diff --git a/src/kudu/master/table_metrics.cc b/src/kudu/master/table_metrics.cc
index 8b85ca0..5fdc6d3 100644
--- a/src/kudu/master/table_metrics.cc
+++ b/src/kudu/master/table_metrics.cc
@@ -36,12 +36,22 @@ METRIC_DEFINE_gauge_uint64(table, live_row_count, "Table Live Row count",
     "Pre-replication aggregated number of live rows in this table. "
     "Only accurate if all tablets in the table support live row counting.",
     kudu::MetricLevel::kInfo);
+METRIC_DEFINE_gauge_uint32(table, column_count, "Table Column count",
+    kudu::MetricUnit::kUnits,
+    "The column count in the table's latest schema.",
+    kudu::MetricLevel::kInfo);
+METRIC_DEFINE_gauge_uint32(table, schema_version, "Table Schema Version",
+    kudu::MetricUnit::kUnits,
+    "The table's schema version.",
+    kudu::MetricLevel::kInfo);
 
 #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0))
 #define HIDEINIT(x, v) x(METRIC_##x.InstantiateHidden(entity, v))
 TableMetrics::TableMetrics(const scoped_refptr<MetricEntity>& entity)
   : GINIT(on_disk_size),
     GINIT(live_row_count),
+    GINIT(column_count),
+    GINIT(schema_version),
     HIDEINIT(merged_entities_count_of_table, 1) {
 }
 #undef GINIT
diff --git a/src/kudu/master/table_metrics.h b/src/kudu/master/table_metrics.h
index 1cf731c..2b851ba 100644
--- a/src/kudu/master/table_metrics.h
+++ b/src/kudu/master/table_metrics.h
@@ -49,6 +49,8 @@ struct TableMetrics {
 
   scoped_refptr<AtomicGauge<uint64_t>> on_disk_size;
   scoped_refptr<AtomicGauge<uint64_t>> live_row_count;
+  scoped_refptr<AtomicGauge<uint32_t>> column_count;
+  scoped_refptr<AtomicGauge<uint32_t>> schema_version;
   scoped_refptr<AtomicGauge<size_t>> merged_entities_count_of_table;
 
   void AddTabletNoOnDiskSize(const std::string& tablet_id);