You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by la...@apache.org on 2022/03/21 04:08:05 UTC

[kudu] branch master updated (91d3101 -> 8409d28)

This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 91d3101  [tools] run intra-location rebalancing in parallel
     new 9a53fec  [tools] KUDU-3333 Include Table Counts in kudu hms Dryrun
     new 8409d28  [metrics] Add table level metrics column_count and schema_version on master

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/master/catalog_manager.cc | 25 +++++++++++++++---
 src/kudu/master/catalog_manager.h  | 10 +++++---
 src/kudu/master/master-test.cc     | 52 ++++++++++++++++++++++++++++++++++++--
 src/kudu/master/table_metrics.cc   | 10 ++++++++
 src/kudu/master/table_metrics.h    |  2 ++
 src/kudu/tools/tool_action_hms.cc  | 25 +++++++++++++++---
 6 files changed, 111 insertions(+), 13 deletions(-)

[kudu] 02/02: [metrics] Add table level metrics column_count and schema_version on master

Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8409d28a5ace591f635b029cb37ebd70f3b8eb10
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Mon Feb 21 18:08:30 2022 +0800

    [metrics] Add table level metrics column_count and schema_version on master
    
    This patch adds some table level metrics on master.
    - column_count: The column count in the table's latest schema. Since too
      many column count will lead to bad performance and space
      amplification, we can add some monitor alerts based on this
      metric after this.
    - schema_version: The table's schema version. Too many or too
      frequently schema changes is also a metric worth to be monitored.
    
    Change-Id: Ia0671cfb0758d53e950a4e35a968dae15f82d18e
    Reviewed-on: http://gerrit.cloudera.org:8080/18258
    Tested-by: Kudu Jenkins
    Reviewed-by: Abhishek Chennaka <ac...@cloudera.com>
    Reviewed-by: Yifan Zhang <ch...@163.com>
    Reviewed-by: Yingchun Lai <ac...@gmail.com>
---
 src/kudu/master/catalog_manager.cc | 25 +++++++++++++++---
 src/kudu/master/catalog_manager.h  | 10 +++++---
 src/kudu/master/master-test.cc     | 52 ++++++++++++++++++++++++++++++++++++--
 src/kudu/master/table_metrics.cc   | 10 ++++++++
 src/kudu/master/table_metrics.h    |  2 ++
 5 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 3f5333f..1b7aa22 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -590,6 +590,10 @@ class TableLoader : public TableVisitor {
       // It's unnecessary to register metrics for the deleted tables.
       table->RegisterMetrics(catalog_manager_->master_->metric_registry(),
           CatalogManager::NormalizeTableName(metadata.name()));
+
+      // Update table's schema related metrics after being loaded.
+      table->UpdateSchemaMetrics();
+
       LOG(INFO) << Substitute("Loaded metadata for table $0", table->ToString());
     }
     VLOG(2) << Substitute("Metadata for table $0: $1",
@@ -2103,6 +2107,9 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   }
   TRACE("Inserted table and tablets into CatalogManager maps");
 
+  // Update table's schema related metrics after being created.
+  table->UpdateSchemaMetrics();
+
   resp->set_table_id(table->id());
   VLOG(1) << "Created table " << table->ToString();
   background_tasks_->Wake();
@@ -3383,6 +3390,9 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB& req,
     table_locations_cache_->Remove(table->id());
   }
 
+  // Update table's schema related metrics after being altered.
+  table->UpdateSchemaMetrics();
+
   background_tasks_->Wake();
   return Status::OK();
 }
@@ -4981,7 +4991,7 @@ Status CatalogManager::ProcessTabletReport(
       if (report.has_stats()) {
         // For the versions >= 1.11.x, the tserver reports stats. But keep in
         // mind that 'live_row_count' is not supported for the legacy replicas.
-        tablet->table()->UpdateMetrics(tablet_id, tablet->GetStats(), report.stats());
+        tablet->table()->UpdateStatsMetrics(tablet_id, tablet->GetStats(), report.stats());
         tablet->UpdateStats(report.stats());
       } else {
         // For the versions < 1.11.x, the tserver doesn't report stats. Thus,
@@ -6739,9 +6749,9 @@ void TableInfo::UnregisterMetrics() {
   }
 }
 
-void TableInfo::UpdateMetrics(const string& tablet_id,
-                              const tablet::ReportedTabletStatsPB& old_stats,
-                              const tablet::ReportedTabletStatsPB& new_stats) {
+void TableInfo::UpdateStatsMetrics(const string& tablet_id,
+                                   const tablet::ReportedTabletStatsPB& old_stats,
+                                   const tablet::ReportedTabletStatsPB& new_stats) {
   if (!metrics_) {
     return;
   }
@@ -6808,6 +6818,13 @@ void TableInfo::UpdateMetrics(const string& tablet_id,
   }
 }
 
+void TableInfo::UpdateSchemaMetrics() {
+  TableMetadataLock l(this, LockMode::READ);
+  const SysTablesEntryPB& pb = metadata().state().pb;
+  metrics_->column_count->set_value(pb.schema().columns().size());
+  metrics_->schema_version->set_value(pb.version());
+}
+
 void TableInfo::InvalidateMetrics(const std::string& tablet_id) {
   if (!metrics_) return;
   if (!metrics_->ContainsTabletNoOnDiskSize(tablet_id)) {
diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h
index 0379ec1..b23b5b7 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -360,9 +360,13 @@ class TableInfo : public RefCountedThreadSafe<TableInfo> {
   void UnregisterMetrics();
 
   // Update stats belonging to 'tablet_id' in the table's metrics.
-  void UpdateMetrics(const std::string& tablet_id,
-                     const tablet::ReportedTabletStatsPB& old_stats,
-                     const tablet::ReportedTabletStatsPB& new_stats);
+  void UpdateStatsMetrics(const std::string& tablet_id,
+                          const tablet::ReportedTabletStatsPB& old_stats,
+                          const tablet::ReportedTabletStatsPB& new_stats);
+
+  // Update table's schema related metrics, for exapmle, schema version,
+  // column count, and etc.
+  void UpdateSchemaMetrics();
 
   // Invalidate stats belonging to 'tablet_id' in the table's metrics.
   void InvalidateMetrics(const std::string& tablet_id);
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index ddf6adc..db34547 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -2491,7 +2491,7 @@ TEST_F(MasterTest, TestHideLiveRowCountInTableMetrics) {
       old_stats.set_live_row_count(1);
       new_stats.set_live_row_count(1);
     }
-    tables[0]->UpdateMetrics(tablet->id(), old_stats, new_stats);
+    tables[0]->UpdateStatsMetrics(tablet->id(), old_stats, new_stats);
   };
 
   // Trigger to cause 'live_row_count' invisible.
@@ -2560,7 +2560,7 @@ TEST_F(MasterTest, TestTableStatisticsWithOldVersion) {
     tablet::ReportedTabletStatsPB new_stats;
     new_stats.set_on_disk_size(1024);
     new_stats.set_live_row_count(1000);
-    table->UpdateMetrics(tablets.back()->id(), old_stats, new_stats);
+    table->UpdateStatsMetrics(tablets.back()->id(), old_stats, new_stats);
     ASSERT_TRUE(table->GetMetrics()->TableSupportsOnDiskSize());
     ASSERT_TRUE(table->GetMetrics()->TableSupportsLiveRowCount());
     ASSERT_EQ(1024, table->GetMetrics()->on_disk_size->value());
@@ -2627,6 +2627,54 @@ TEST_F(MasterTest, TestDeletedTablesAndTabletsCleanup) {
   });
 }
 
+TEST_F(MasterTest, TestTableSchemaMetrics) {
+  const char* kTableName = "testtable";
+  const Schema kTableSchema({ ColumnSchema("key", INT32) }, 1);
+  ASSERT_OK(CreateTable(kTableName, kTableSchema));
+
+  vector<scoped_refptr<TableInfo>> tables;
+  {
+    CatalogManager::ScopedLeaderSharedLock l(master_->catalog_manager());
+    master_->catalog_manager()->GetAllTables(&tables);
+    ASSERT_EQ(1, tables.size());
+  }
+  const auto& table = tables[0];
+
+  ASSERT_EQ(1, table->GetMetrics()->column_count->value());
+  ASSERT_EQ(0, table->GetMetrics()->schema_version->value());
+
+  {
+    AlterTableRequestPB req;
+    req.mutable_table()->set_table_name(kTableName);
+    AlterTableRequestPB::Step* step = req.add_alter_schema_steps();
+    step->set_type(AlterTableRequestPB::ADD_COLUMN);
+    ColumnSchemaToPB(ColumnSchema("c1", INT32, true),
+                     step->mutable_add_column()->mutable_schema());
+
+    AlterTableResponsePB resp;
+    RpcController controller;
+    ASSERT_OK(proxy_->AlterTable(req, &resp, &controller));
+    ASSERT_FALSE(resp.has_error());
+    ASSERT_EQ(2, table->GetMetrics()->column_count->value());
+    ASSERT_EQ(1, table->GetMetrics()->schema_version->value());
+  }
+
+  {
+    AlterTableRequestPB req;
+    req.mutable_table()->set_table_name(kTableName);
+    AlterTableRequestPB::Step* step = req.add_alter_schema_steps();
+    step->set_type(AlterTableRequestPB::DROP_COLUMN);
+    step->mutable_drop_column()->set_name("c1");
+
+    AlterTableResponsePB resp;
+    RpcController controller;
+    ASSERT_OK(proxy_->AlterTable(req, &resp, &controller));
+    ASSERT_FALSE(resp.has_error());
+    ASSERT_EQ(1, table->GetMetrics()->column_count->value());
+    ASSERT_EQ(2, table->GetMetrics()->schema_version->value());
+  }
+}
+
 class AuthzTokenMasterTest : public MasterTest,
                              public ::testing::WithParamInterface<bool> {};
 
diff --git a/src/kudu/master/table_metrics.cc b/src/kudu/master/table_metrics.cc
index 8b85ca0..5fdc6d3 100644
--- a/src/kudu/master/table_metrics.cc
+++ b/src/kudu/master/table_metrics.cc
@@ -36,12 +36,22 @@ METRIC_DEFINE_gauge_uint64(table, live_row_count, "Table Live Row count",
     "Pre-replication aggregated number of live rows in this table. "
     "Only accurate if all tablets in the table support live row counting.",
     kudu::MetricLevel::kInfo);
+METRIC_DEFINE_gauge_uint32(table, column_count, "Table Column count",
+    kudu::MetricUnit::kUnits,
+    "The column count in the table's latest schema.",
+    kudu::MetricLevel::kInfo);
+METRIC_DEFINE_gauge_uint32(table, schema_version, "Table Schema Version",
+    kudu::MetricUnit::kUnits,
+    "The table's schema version.",
+    kudu::MetricLevel::kInfo);
 
 #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0))
 #define HIDEINIT(x, v) x(METRIC_##x.InstantiateHidden(entity, v))
 TableMetrics::TableMetrics(const scoped_refptr<MetricEntity>& entity)
   : GINIT(on_disk_size),
     GINIT(live_row_count),
+    GINIT(column_count),
+    GINIT(schema_version),
     HIDEINIT(merged_entities_count_of_table, 1) {
 }
 #undef GINIT
diff --git a/src/kudu/master/table_metrics.h b/src/kudu/master/table_metrics.h
index 1cf731c..2b851ba 100644
--- a/src/kudu/master/table_metrics.h
+++ b/src/kudu/master/table_metrics.h
@@ -49,6 +49,8 @@ struct TableMetrics {
 
   scoped_refptr<AtomicGauge<uint64_t>> on_disk_size;
   scoped_refptr<AtomicGauge<uint64_t>> live_row_count;
+  scoped_refptr<AtomicGauge<uint32_t>> column_count;
+  scoped_refptr<AtomicGauge<uint32_t>> schema_version;
   scoped_refptr<AtomicGauge<size_t>> merged_entities_count_of_table;
 
   void AddTabletNoOnDiskSize(const std::string& tablet_id);

[kudu] 01/02: [tools] KUDU-3333 Include Table Counts in kudu hms Dryrun

Posted by la...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 9a53fec14b9aaa811732f2b7a87da36d893203da
Author: Abhishek Chennaka <ac...@cloudera.com>
AuthorDate: Sun Feb 27 23:23:07 2022 -0500

    [tools] KUDU-3333 Include Table Counts in kudu hms Dryrun
    
    In cases where the user running the Kudu CLI tool, kudu hms
    fix, doesn't have permissions from Ranger/Sentry to access
    the tables, these tables would be treated as non-existant tables
    in Kudu. In such scenarios, there might be situations where the
    tables could be dropped from HMS inspite of them being present
    in Kudu when run with -drop_orphan_tables flag.
    
    This patch adds additional logging which reports the total
    table counts from HMS and Kudu master catalogs and warns the
    user if there are no tables in Kudu when kudu hms fix command is
    run.
    
    Sample runs of the tool before and after the change:
    In case of an empty cluster no output is seen without the code
    change. After the code change we see the below:
    $ ./kudu hms fix `hostname -f`
    I0315 16:16:36.039008 351197 tool_action_hms.cc:867] Number of Kudu tables found in Kudu master catalog: 0
    I0315 16:16:36.039080 351197 tool_action_hms.cc:868] Number of Kudu tables found in HMS catalog: 0
    $ ./kudu hms fix --dryrun `hostname -f`
    I0315 16:16:55.158463 351291 tool_action_hms.cc:642] NOTE: There are zero kudu tables listed. If the cluster indeed has kudu tables please re-run the command with right credentials.
    I0315 16:16:55.158546 351291 tool_action_hms.cc:867] Number of Kudu tables found in Kudu master catalog: 0
    I0315 16:16:55.158555 351291 tool_action_hms.cc:868] Number of Kudu tables found in HMS catalog: 0
    
    In case of a non-empty cluster without the change:
    $ kudu hms fix --dryrun `hostname -f` --ignore_other_clusters=false
    I0315 16:57:55.329049 365038 tool_action_hms.cc:757] [dryrun] Refreshing HMS table metadata for Kudu table default.my_first_table [id=408e5696e51c462c86a6d9a84bb95583]
    Non-empty cluster after the change:
    $ ./kudu hms fix --dryrun `hostname -f`
    I0315 16:19:20.885208 352393 tool_action_hms.cc:822] [dryrun] Changing owner of default.my_first_table [id=408e5696e51c462c86a6d9a84bb95583] to admin in Kudu catalog.
    I0315 16:19:20.885274 352393 tool_action_hms.cc:853] [dryrun] Refreshing HMS table metadata for Kudu table default.my_first_table [id=408e5696e51c462c86a6d9a84bb95583]
    I0315 16:19:20.885285 352393 tool_action_hms.cc:867] Number of Kudu tables found in Kudu master catalog: 1
    I0315 16:19:20.885325 352393 tool_action_hms.cc:868] Number of Kudu tables found in HMS catalog: 1
    
    Change-Id: Idf26141d2a3fd6cbb7249b3492fc6a50a0c0aa2d
    Reviewed-on: http://gerrit.cloudera.org:8080/18280
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/tools/tool_action_hms.cc | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
index b554967..29704cd 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -376,7 +376,9 @@ struct CatalogReport {
 Status AnalyzeCatalogs(const string& master_addrs,
                        HmsCatalog* hms_catalog,
                        KuduClient* kudu_client,
-                       CatalogReport* report) {
+                       CatalogReport* report,
+                       int* kudu_catalog_count = nullptr,
+                       int* hms_catalog_count = nullptr) {
   // Step 1: retrieve all Kudu tables, and aggregate them by ID and by name. The
   // by-ID map will be used to match the HMS Kudu table entries. The by-name map
   // will be used to match against legacy Impala/Kudu HMS table entries.
@@ -385,6 +387,9 @@ Status AnalyzeCatalogs(const string& master_addrs,
   {
     vector<string> kudu_table_names;
     RETURN_NOT_OK(kudu_client->ListTables(&kudu_table_names));
+    if (kudu_catalog_count) {
+      *kudu_catalog_count = kudu_table_names.size();
+    }
     for (const string& kudu_table_name : kudu_table_names) {
       shared_ptr<KuduTable> kudu_table;
       // TODO(dan): When the error is NotFound, prepend an admonishment about not
@@ -405,6 +410,9 @@ Status AnalyzeCatalogs(const string& master_addrs,
   {
     vector<hive::Table> hms_tables;
     RETURN_NOT_OK(hms_catalog->GetKuduTables(&hms_tables));
+    if (hms_catalog_count) {
+      *hms_catalog_count = hms_tables.size();
+    }
     for (hive::Table& hms_table : hms_tables) {
       // If the addresses in the HMS entry don't overlap at all with the
       // expected addresses, the entry is likely from another Kudu cluster.
@@ -626,8 +634,14 @@ Status FixHmsMetadata(const RunnerContext& context) {
   RETURN_NOT_OK(Init(context, &kudu_client, &hms_catalog, &master_addrs));
 
   CatalogReport report;
-  RETURN_NOT_OK(AnalyzeCatalogs(master_addrs, hms_catalog.get(), kudu_client.get(), &report));
-
+  int kudu_catalog_count = 0;
+  int hms_catalog_count = 0;
+  RETURN_NOT_OK(AnalyzeCatalogs(master_addrs, hms_catalog.get(), kudu_client.get(), &report,
+                                &kudu_catalog_count, &hms_catalog_count));
+  if (FLAGS_dryrun && kudu_catalog_count == 0) {
+    LOG(INFO) << "NOTE: There are zero kudu tables listed. If the cluster indeed has kudu tables "
+                 "please re-run the command with right credentials." << endl;
+  }
   bool success = true;
 
   if (FLAGS_drop_orphan_hms_tables) {
@@ -850,7 +864,10 @@ Status FixHmsMetadata(const RunnerContext& context) {
       }
     }
   }
-
+  LOG(INFO) << Substitute("Number of Kudu tables found in Kudu master catalog: $0",
+                          kudu_catalog_count) << endl;
+  LOG(INFO) << Substitute("Number of Kudu tables found in HMS catalog: $0", hms_catalog_count)
+            << endl;
   if (FLAGS_dryrun || success) {
     return Status::OK();
   }