You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/09/25 16:22:18 UTC
[impala] branch master updated: IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 6af8154ec IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode
6af8154ec is described below
commit 6af8154ecb7795506d1afebf044c89cf2cf83f8e
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Thu Sep 21 09:45:36 2023 +0800
IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode
In the /metrics webUI, coordinator shows metrics of
"catalog.num-databases" and "catalog.num-tables" for its local catalog
cache. They are updated at the end of each query execution, via
Frontend.getCatalogMetrics().
In LocalCatalog mode, there is no need for every coordinator to have the
full list of tables of every database. However, getCatalogMetrics ends
up iterating over every DB and fetching these lists (if uncached) in
order to provide a count. This introduces unnecessary catalog RPCs at
the end of each query execution. When catalogd is slow/hanging in
processing such coordinator RPCs, simple queries will also be hanging.
This patch removes tracking the db/table count metrics from coordinator
side in LocalCatalog mode. They will always be -1. The count isn't
particularly relevant – if someone wants to keep track of the size of
their catalog they are better off looking at that metric from catalogd.
Tests:
- test_non_compact_catalog_topic_updates uses these two metrics to
detect new catalog updates. Changed it to use "catalog.curr-version"
instead.
Change-Id: I02a409b7b24577f75d7c439c85bc3491ec7c518c
Reviewed-on: http://gerrit.cloudera.org:8080/20500
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
Tested-by: Wenzhe Zhou <wz...@cloudera.com>
---
common/thrift/metrics.json | 4 ++--
.../main/java/org/apache/impala/service/Frontend.java | 17 ++++++++++++-----
tests/custom_cluster/test_compact_catalog_updates.py | 10 ++++++----
tests/custom_cluster/test_local_catalog.py | 2 +-
4 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index d64ed31fb..d9391818c 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -330,7 +330,7 @@
"key": "catalog-server.topic-processing-time-s"
},
{
- "description": "The number of databases in the catalog.",
+ "description": "The number of databases in the catalog. Untracked in LocalCatalog mode.",
"contexts": [
"IMPALAD"
],
@@ -340,7 +340,7 @@
"key": "catalog.num-databases"
},
{
- "description": "The number of tables in the catalog.",
+ "description": "The number of tables in the catalog. Untracked in LocalCatalog mode.",
"contexts": [
"IMPALAD"
],
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 794a65a79..398a14854 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -1051,13 +1051,20 @@ public class Frontend {
return planCtx.getExplainString();
}
- public TGetCatalogMetricsResult getCatalogMetrics() throws ImpalaException {
+ public TGetCatalogMetricsResult getCatalogMetrics() {
TGetCatalogMetricsResult resp = new TGetCatalogMetricsResult();
- for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
- resp.num_dbs++;
- resp.num_tables += db.getAllTableNames().size();
+ if (BackendConfig.INSTANCE.getBackendCfg().use_local_catalog) {
+ // Don't track these two metrics in LocalCatalog mode since they might introduce
+ // catalogd RPCs when the db list or some table lists are not cached.
+ resp.num_dbs = -1;
+ resp.num_tables = -1;
+ FeCatalogUtils.populateCacheMetrics(getCatalog(), resp);
+ } else {
+ for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
+ resp.num_dbs++;
+ resp.num_tables += db.getAllTableNames().size();
+ }
}
- FeCatalogUtils.populateCacheMetrics(getCatalog(), resp);
return resp;
}
diff --git a/tests/custom_cluster/test_compact_catalog_updates.py b/tests/custom_cluster/test_compact_catalog_updates.py
index 9ca1dc92a..de3a6c50d 100644
--- a/tests/custom_cluster/test_compact_catalog_updates.py
+++ b/tests/custom_cluster/test_compact_catalog_updates.py
@@ -43,9 +43,9 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
try:
# Check that initial catalop update topic has been received
impalad1 = self.cluster.impalads[0]
- assert impalad1.service.get_metric_value("catalog.num-tables") > 0
+ assert impalad1.service.get_metric_value("catalog.curr-version") > 0
impalad2 = self.cluster.impalads[1]
- assert impalad2.service.get_metric_value("catalog.num-tables") > 0
+ assert impalad2.service.get_metric_value("catalog.curr-version") > 0
client1 = impalad1.service.create_beeswax_client()
client2 = impalad2.service.create_beeswax_client()
@@ -55,10 +55,12 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
result = client2.execute("select count(*) from functional.alltypes")
assert result.data[0] == "7300"
+ prev_v1 = impalad1.service.get_metric_value("catalog.curr-version")
+ prev_v2 = impalad2.service.get_metric_value("catalog.curr-version")
self.execute_query_expect_success(client1, "invalidate metadata", query_options)
self.execute_query_expect_success(client2, "show databases")
- assert impalad1.service.get_metric_value("catalog.num-databases") > 0
- assert impalad2.service.get_metric_value("catalog.num-databases") > 0
+ assert impalad1.service.get_metric_value("catalog.curr-version") > prev_v1
+ assert impalad2.service.get_metric_value("catalog.curr-version") > prev_v2
finally:
client1.close()
client2.close()
diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py
index 050069f6c..04fd2d33f 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -517,7 +517,7 @@ class TestObservability(CustomClusterTestSuite):
cache_entry_median_size = cache_metrics[cache_entry_median_size_key]
cache_entry_99th_size = cache_metrics[cache_entry_99th_size_key]
- assert cache_entry_median_size > 300 and cache_entry_median_size < 1000
+ assert cache_entry_median_size > 300 and cache_entry_median_size < 3000
assert cache_entry_99th_size > 12500 and cache_entry_99th_size < 19000
cache_hit_count_prev_run = cache_hit_count