You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/09/25 16:22:18 UTC

[impala] branch master updated: IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 6af8154ec IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode
6af8154ec is described below

commit 6af8154ecb7795506d1afebf044c89cf2cf83f8e
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Thu Sep 21 09:45:36 2023 +0800

    IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode
    
    In the /metrics webUI, coordinator shows metrics of
    "catalog.num-databases" and "catalog.num-tables" for its local catalog
    cache. They are updated at the end of each query execution, via
    Frontend.getCatalogMetrics().
    
    In LocalCatalog mode, there is no need for every coordinator to have the
    full list of tables of every database. However, getCatalogMetrics ends
    up iterating over every DB and fetching these lists (if uncached) in
    order to provide a count. This introduces unnecessary catalog RPCs at
    the end of each query execution. When catalogd is slow/hanging in
    processing such coordinator RPCs, simple queries will also be hanging.
    
    This patch removes tracking the db/table count metrics from coordinator
    side in LocalCatalog mode. They will always be -1. The count isn't
    particularly relevant – if someone wants to keep track of the size of
    their catalog they are better off looking at that metric from catalogd.
    
    Tests:
     - test_non_compact_catalog_topic_updates uses these two metrics to
       detect new catalog updates. Changed it to use "catalog.curr-version"
       instead.
    
    Change-Id: I02a409b7b24577f75d7c439c85bc3491ec7c518c
    Reviewed-on: http://gerrit.cloudera.org:8080/20500
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
    Tested-by: Wenzhe Zhou <wz...@cloudera.com>
---
 common/thrift/metrics.json                              |  4 ++--
 .../main/java/org/apache/impala/service/Frontend.java   | 17 ++++++++++++-----
 tests/custom_cluster/test_compact_catalog_updates.py    | 10 ++++++----
 tests/custom_cluster/test_local_catalog.py              |  2 +-
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index d64ed31fb..d9391818c 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -330,7 +330,7 @@
     "key": "catalog-server.topic-processing-time-s"
   },
   {
-    "description": "The number of databases in the catalog.",
+    "description": "The number of databases in the catalog. Untracked in LocalCatalog mode.",
     "contexts": [
       "IMPALAD"
     ],
@@ -340,7 +340,7 @@
     "key": "catalog.num-databases"
   },
   {
-    "description": "The number of tables in the catalog.",
+    "description": "The number of tables in the catalog. Untracked in LocalCatalog mode.",
     "contexts": [
       "IMPALAD"
     ],
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 794a65a79..398a14854 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -1051,13 +1051,20 @@ public class Frontend {
     return planCtx.getExplainString();
   }
 
-  public TGetCatalogMetricsResult getCatalogMetrics() throws ImpalaException {
+  public TGetCatalogMetricsResult getCatalogMetrics() {
     TGetCatalogMetricsResult resp = new TGetCatalogMetricsResult();
-    for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
-      resp.num_dbs++;
-      resp.num_tables += db.getAllTableNames().size();
+    if (BackendConfig.INSTANCE.getBackendCfg().use_local_catalog) {
+      // Don't track these two metrics in LocalCatalog mode since they might introduce
+      // catalogd RPCs when the db list or some table lists are not cached.
+      resp.num_dbs = -1;
+      resp.num_tables = -1;
+      FeCatalogUtils.populateCacheMetrics(getCatalog(), resp);
+    } else {
+      for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
+        resp.num_dbs++;
+        resp.num_tables += db.getAllTableNames().size();
+      }
     }
-    FeCatalogUtils.populateCacheMetrics(getCatalog(), resp);
     return resp;
   }
 
diff --git a/tests/custom_cluster/test_compact_catalog_updates.py b/tests/custom_cluster/test_compact_catalog_updates.py
index 9ca1dc92a..de3a6c50d 100644
--- a/tests/custom_cluster/test_compact_catalog_updates.py
+++ b/tests/custom_cluster/test_compact_catalog_updates.py
@@ -43,9 +43,9 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
     try:
       # Check that initial catalop update topic has been received
       impalad1 = self.cluster.impalads[0]
-      assert impalad1.service.get_metric_value("catalog.num-tables") > 0
+      assert impalad1.service.get_metric_value("catalog.curr-version") > 0
       impalad2 = self.cluster.impalads[1]
-      assert impalad2.service.get_metric_value("catalog.num-tables") > 0
+      assert impalad2.service.get_metric_value("catalog.curr-version") > 0
 
       client1 = impalad1.service.create_beeswax_client()
       client2 = impalad2.service.create_beeswax_client()
@@ -55,10 +55,12 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
       result = client2.execute("select count(*) from functional.alltypes")
       assert result.data[0] == "7300"
 
+      prev_v1 = impalad1.service.get_metric_value("catalog.curr-version")
+      prev_v2 = impalad2.service.get_metric_value("catalog.curr-version")
       self.execute_query_expect_success(client1, "invalidate metadata", query_options)
       self.execute_query_expect_success(client2, "show databases")
-      assert impalad1.service.get_metric_value("catalog.num-databases") > 0
-      assert impalad2.service.get_metric_value("catalog.num-databases") > 0
+      assert impalad1.service.get_metric_value("catalog.curr-version") > prev_v1
+      assert impalad2.service.get_metric_value("catalog.curr-version") > prev_v2
     finally:
       client1.close()
       client2.close()
diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py
index 050069f6c..04fd2d33f 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -517,7 +517,7 @@ class TestObservability(CustomClusterTestSuite):
 
           cache_entry_median_size = cache_metrics[cache_entry_median_size_key]
           cache_entry_99th_size = cache_metrics[cache_entry_99th_size_key]
-          assert cache_entry_median_size > 300 and cache_entry_median_size < 1000
+          assert cache_entry_median_size > 300 and cache_entry_median_size < 3000
           assert cache_entry_99th_size > 12500 and cache_entry_99th_size < 19000
 
           cache_hit_count_prev_run = cache_hit_count