You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2024/01/11 18:08:01 UTC

(impala) 05/05: IMPALA-12702: Show reduced cardinality estimation in ExecSummary

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a2b8aed2c2d4c1bb25ed9626a2b014b79ec741ad
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Wed Jan 10 10:16:02 2024 -0800

    IMPALA-12702: Show reduced cardinality estimation in ExecSummary
    
    In the query profile, cardinality reduction from IMPALA-12018 is
    highlighted in Plan section, but missing out from ExecSummary section.
    This patch changes the ExecSummary to show the reduced cardinality
    estimation if it set.
    
    Testing:
    - Add TestObservability::test_reduced_cardinality_by_filter
    
    Change-Id: If1f51ce585a1cb66e518b725686ab3076ffa8168
    Reviewed-on: http://gerrit.cloudera.org:8080/20879
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../main/java/org/apache/impala/planner/PlanNode.java  |  3 ++-
 .../queries/QueryTest/runtime_filters.test             |  6 +++---
 tests/query_test/test_observability.py                 | 18 ++++++++++++++++++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index bdd2fbcb0..e14195f12 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -498,7 +498,8 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
     msg.limit = limit_;
 
     TExecStats estimatedStats = new TExecStats();
-    estimatedStats.setCardinality(cardinality_);
+    estimatedStats.setCardinality(
+        filteredCardinality_ > -1 ? filteredCardinality_ : cardinality_);
     estimatedStats.setMemory_used(nodeResourceProfile_.getMemEstimateBytes());
     msg.setLabel(getDisplayLabel());
     msg.setLabel_detail(getDisplayLabelDetail());
diff --git a/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test b/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test
index b06b39b3c..00c6c96a6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test
@@ -27,7 +27,7 @@ on p.month = b.int_col and b.month = 1 and b.string_col = "1"
 ---- RUNTIME_PROFILE
 aggregation(SUM, Files rejected): 22
 ---- RUNTIME_PROFILE: table_format=kudu
-row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.*
+row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+608.*
 ====
 
 
@@ -59,7 +59,7 @@ on p.month = b.int_col and b.month = 1 and b.string_col = "1"
 ---- RUNTIME_PROFILE
 aggregation(SUM, Files rejected): 22
 ---- RUNTIME_PROFILE: table_format=kudu
-row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.*
+row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+608.*
 ====
 
 
@@ -321,7 +321,7 @@ with t1 as (select month x, bigint_col y from alltypes limit 7301),
 ---- RUNTIME_PROFILE
 aggregation(SUM, Files rejected): 22
 ---- RUNTIME_PROFILE: table_format=kudu
-row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.*
+row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+1.82K.*
 ====
 
 
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index d82b3b0d7..597dea515 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -872,6 +872,24 @@ class TestObservability(ImpalaTestSuite):
     assert len(re.findall('Single node plan created:', runtime_profile, re.M)) == 2
     assert len(re.findall('Distributed plan created:', runtime_profile, re.M)) == 2
 
+  def test_reduced_cardinality_by_filter(self):
+    """IMPALA-12702: Check that ExecSummary shows the reduced cardinality estimation."""
+    query_opts = {'compute_processing_cost': True}
+    query = """select STRAIGHT_JOIN count(*) from
+        (select l_orderkey from tpch_parquet.lineitem) a
+        join (select o_orderkey, o_custkey from tpch_parquet.orders) l1
+          on a.l_orderkey = l1.o_orderkey
+        where l1.o_custkey < 1000"""
+    result = self.execute_query(query, query_opts)
+    scan = result.exec_summary[10]
+    assert scan['operator'] == '00:SCAN HDFS'
+    assert scan['num_rows'] == 39563
+    assert scan['est_num_rows'] == 575771
+    assert scan['detail'] == 'tpch_parquet.lineitem'
+    runtime_profile = result.runtime_profile
+    assert "cardinality=575.77K(filtered from 6.00M)" in runtime_profile
+
+
 class TestQueryStates(ImpalaTestSuite):
   """Test that the 'Query State' and 'Impala Query State' are set correctly in the
   runtime profile."""