You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2024/01/11 18:08:01 UTC
(impala) 05/05: IMPALA-12702: Show reduced cardinality estimation in ExecSummary
This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit a2b8aed2c2d4c1bb25ed9626a2b014b79ec741ad
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Wed Jan 10 10:16:02 2024 -0800
IMPALA-12702: Show reduced cardinality estimation in ExecSummary
In the query profile, cardinality reduction from IMPALA-12018 is
highlighted in Plan section, but missing out from ExecSummary section.
This patch changes the ExecSummary to show the reduced cardinality
estimation if it set.
Testing:
- Add TestObservability::test_reduced_cardinality_by_filter
Change-Id: If1f51ce585a1cb66e518b725686ab3076ffa8168
Reviewed-on: http://gerrit.cloudera.org:8080/20879
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../main/java/org/apache/impala/planner/PlanNode.java | 3 ++-
.../queries/QueryTest/runtime_filters.test | 6 +++---
tests/query_test/test_observability.py | 18 ++++++++++++++++++
3 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index bdd2fbcb0..e14195f12 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -498,7 +498,8 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
msg.limit = limit_;
TExecStats estimatedStats = new TExecStats();
- estimatedStats.setCardinality(cardinality_);
+ estimatedStats.setCardinality(
+ filteredCardinality_ > -1 ? filteredCardinality_ : cardinality_);
estimatedStats.setMemory_used(nodeResourceProfile_.getMemEstimateBytes());
msg.setLabel(getDisplayLabel());
msg.setLabel_detail(getDisplayLabelDetail());
diff --git a/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test b/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test
index b06b39b3c..00c6c96a6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test
@@ -27,7 +27,7 @@ on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RUNTIME_PROFILE
aggregation(SUM, Files rejected): 22
---- RUNTIME_PROFILE: table_format=kudu
-row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.*
+row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+608.*
====
@@ -59,7 +59,7 @@ on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RUNTIME_PROFILE
aggregation(SUM, Files rejected): 22
---- RUNTIME_PROFILE: table_format=kudu
-row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.*
+row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+608.*
====
@@ -321,7 +321,7 @@ with t1 as (select month x, bigint_col y from alltypes limit 7301),
---- RUNTIME_PROFILE
aggregation(SUM, Files rejected): 22
---- RUNTIME_PROFILE: table_format=kudu
-row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.*
+row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+1.82K.*
====
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index d82b3b0d7..597dea515 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -872,6 +872,24 @@ class TestObservability(ImpalaTestSuite):
assert len(re.findall('Single node plan created:', runtime_profile, re.M)) == 2
assert len(re.findall('Distributed plan created:', runtime_profile, re.M)) == 2
+ def test_reduced_cardinality_by_filter(self):
+ """IMPALA-12702: Check that ExecSummary shows the reduced cardinality estimation."""
+ query_opts = {'compute_processing_cost': True}
+ query = """select STRAIGHT_JOIN count(*) from
+ (select l_orderkey from tpch_parquet.lineitem) a
+ join (select o_orderkey, o_custkey from tpch_parquet.orders) l1
+ on a.l_orderkey = l1.o_orderkey
+ where l1.o_custkey < 1000"""
+ result = self.execute_query(query, query_opts)
+ scan = result.exec_summary[10]
+ assert scan['operator'] == '00:SCAN HDFS'
+ assert scan['num_rows'] == 39563
+ assert scan['est_num_rows'] == 575771
+ assert scan['detail'] == 'tpch_parquet.lineitem'
+ runtime_profile = result.runtime_profile
+ assert "cardinality=575.77K(filtered from 6.00M)" in runtime_profile
+
+
class TestQueryStates(ImpalaTestSuite):
"""Test that the 'Query State' and 'Impala Query State' are set correctly in the
runtime profile."""