You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by la...@apache.org on 2022/03/21 04:08:06 UTC

[kudu] 01/02: [tools] KUDU-3333 Include Table Counts in kudu hms Dryrun

This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 9a53fec14b9aaa811732f2b7a87da36d893203da
Author: Abhishek Chennaka <ac...@cloudera.com>
AuthorDate: Sun Feb 27 23:23:07 2022 -0500

    [tools] KUDU-3333 Include Table Counts in kudu hms Dryrun
    
    In cases where the user running the Kudu CLI tool, kudu hms
    fix, doesn't have permissions from Ranger/Sentry to access
    the tables, these tables would be treated as non-existant tables
    in Kudu. In such scenarios, there might be situations where the
    tables could be dropped from HMS inspite of them being present
    in Kudu when run with -drop_orphan_tables flag.
    
    This patch adds additional logging which reports the total
    table counts from HMS and Kudu master catalogs and warns the
    user if there are no tables in Kudu when kudu hms fix command is
    run.
    
    Sample runs of the tool before and after the change:
    In case of an empty cluster no output is seen without the code
    change. After the code change we see the below:
    $ ./kudu hms fix `hostname -f`
    I0315 16:16:36.039008 351197 tool_action_hms.cc:867] Number of Kudu tables found in Kudu master catalog: 0
    I0315 16:16:36.039080 351197 tool_action_hms.cc:868] Number of Kudu tables found in HMS catalog: 0
    $ ./kudu hms fix --dryrun `hostname -f`
    I0315 16:16:55.158463 351291 tool_action_hms.cc:642] NOTE: There are zero kudu tables listed. If the cluster indeed has kudu tables please re-run the command with right credentials.
    I0315 16:16:55.158546 351291 tool_action_hms.cc:867] Number of Kudu tables found in Kudu master catalog: 0
    I0315 16:16:55.158555 351291 tool_action_hms.cc:868] Number of Kudu tables found in HMS catalog: 0
    
    In case of a non-empty cluster without the change:
    $ kudu hms fix --dryrun `hostname -f` --ignore_other_clusters=false
    I0315 16:57:55.329049 365038 tool_action_hms.cc:757] [dryrun] Refreshing HMS table metadata for Kudu table default.my_first_table [id=408e5696e51c462c86a6d9a84bb95583]
    Non-empty cluster after the change:
    $ ./kudu hms fix --dryrun `hostname -f`
    I0315 16:19:20.885208 352393 tool_action_hms.cc:822] [dryrun] Changing owner of default.my_first_table [id=408e5696e51c462c86a6d9a84bb95583] to admin in Kudu catalog.
    I0315 16:19:20.885274 352393 tool_action_hms.cc:853] [dryrun] Refreshing HMS table metadata for Kudu table default.my_first_table [id=408e5696e51c462c86a6d9a84bb95583]
    I0315 16:19:20.885285 352393 tool_action_hms.cc:867] Number of Kudu tables found in Kudu master catalog: 1
    I0315 16:19:20.885325 352393 tool_action_hms.cc:868] Number of Kudu tables found in HMS catalog: 1
    
    Change-Id: Idf26141d2a3fd6cbb7249b3492fc6a50a0c0aa2d
    Reviewed-on: http://gerrit.cloudera.org:8080/18280
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/tools/tool_action_hms.cc | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
index b554967..29704cd 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -376,7 +376,9 @@ struct CatalogReport {
 Status AnalyzeCatalogs(const string& master_addrs,
                        HmsCatalog* hms_catalog,
                        KuduClient* kudu_client,
-                       CatalogReport* report) {
+                       CatalogReport* report,
+                       int* kudu_catalog_count = nullptr,
+                       int* hms_catalog_count = nullptr) {
   // Step 1: retrieve all Kudu tables, and aggregate them by ID and by name. The
   // by-ID map will be used to match the HMS Kudu table entries. The by-name map
   // will be used to match against legacy Impala/Kudu HMS table entries.
@@ -385,6 +387,9 @@ Status AnalyzeCatalogs(const string& master_addrs,
   {
     vector<string> kudu_table_names;
     RETURN_NOT_OK(kudu_client->ListTables(&kudu_table_names));
+    if (kudu_catalog_count) {
+      *kudu_catalog_count = kudu_table_names.size();
+    }
     for (const string& kudu_table_name : kudu_table_names) {
       shared_ptr<KuduTable> kudu_table;
       // TODO(dan): When the error is NotFound, prepend an admonishment about not
@@ -405,6 +410,9 @@ Status AnalyzeCatalogs(const string& master_addrs,
   {
     vector<hive::Table> hms_tables;
     RETURN_NOT_OK(hms_catalog->GetKuduTables(&hms_tables));
+    if (hms_catalog_count) {
+      *hms_catalog_count = hms_tables.size();
+    }
     for (hive::Table& hms_table : hms_tables) {
       // If the addresses in the HMS entry don't overlap at all with the
       // expected addresses, the entry is likely from another Kudu cluster.
@@ -626,8 +634,14 @@ Status FixHmsMetadata(const RunnerContext& context) {
   RETURN_NOT_OK(Init(context, &kudu_client, &hms_catalog, &master_addrs));
 
   CatalogReport report;
-  RETURN_NOT_OK(AnalyzeCatalogs(master_addrs, hms_catalog.get(), kudu_client.get(), &report));
-
+  int kudu_catalog_count = 0;
+  int hms_catalog_count = 0;
+  RETURN_NOT_OK(AnalyzeCatalogs(master_addrs, hms_catalog.get(), kudu_client.get(), &report,
+                                &kudu_catalog_count, &hms_catalog_count));
+  if (FLAGS_dryrun && kudu_catalog_count == 0) {
+    LOG(INFO) << "NOTE: There are zero kudu tables listed. If the cluster indeed has kudu tables "
+                 "please re-run the command with right credentials." << endl;
+  }
   bool success = true;
 
   if (FLAGS_drop_orphan_hms_tables) {
@@ -850,7 +864,10 @@ Status FixHmsMetadata(const RunnerContext& context) {
       }
     }
   }
-
+  LOG(INFO) << Substitute("Number of Kudu tables found in Kudu master catalog: $0",
+                          kudu_catalog_count) << endl;
+  LOG(INFO) << Substitute("Number of Kudu tables found in HMS catalog: $0", hms_catalog_count)
+            << endl;
   if (FLAGS_dryrun || success) {
     return Status::OK();
   }