You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/11 02:18:04 UTC

[impala] 03/03: IMPALA-8848: fix UNION missing input cardinality bug

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2066b72bc4b43fd57679c37145eecebaa6be8b27
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Aug 8 13:52:00 2019 -0700

    IMPALA-8848: fix UNION missing input cardinality bug
    
    If a UNION has children, and none of those children has
    a known cardinality, then we can make no reasonable
    estimate of the output cardinality, so the planner
    should consider the output cardinality to be unknown.
    
    The previous behaviour was to report a cardinality of
    0, which is unsafe because the planner may make further
    decisions under the incorrect assumption that the output
    of the UNION is tiny.
    
    Testing:
    An existing CardinalityTest already tested this but had
    the wrong estimate.
    
    Change-Id: Ic3ed670ffb685d8ff24824933ca303f3219737bb
    Reviewed-on: http://gerrit.cloudera.org:8080/14036
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../main/java/org/apache/impala/planner/UnionNode.java   | 15 ++++++++++++---
 .../java/org/apache/impala/planner/CardinalityTest.java  | 16 +++++++++++++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/UnionNode.java b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
index d119ac7..cfdd076 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnionNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
@@ -110,15 +110,24 @@ public class UnionNode extends PlanNode {
   @Override
   public void computeStats(Analyzer analyzer) {
     super.computeStats(analyzer);
-    cardinality_ = constExprLists_.size();
+    long totalChildCardinality = 0;
+    boolean haveChildWithCardinality = false;
     for (PlanNode child: children_) {
       // ignore missing child cardinality info in the hope it won't matter enough
       // to change the planning outcome
-      if (child.cardinality_ > 0) {
-        cardinality_ = checkedAdd(cardinality_, child.cardinality_);
+      if (child.cardinality_ >= 0) {
+        totalChildCardinality = checkedAdd(totalChildCardinality, child.cardinality_);
+        haveChildWithCardinality = true;
       }
       numNodes_ = Math.max(child.getNumNodes(), numNodes_);
     }
+    // Consider estimate valid if we have at least one child with known cardinality, or
+    // only constant values.
+    if (haveChildWithCardinality || children_.size() == 0) {
+      cardinality_ = checkedAdd(totalChildCardinality, constExprLists_.size());
+    } else {
+      cardinality_ = -1;
+    }
     // The number of nodes of a union node is -1 (invalid) if all the referenced tables
     // are inline views (e.g. select 1 FROM (VALUES(1 x, 1 y)) a FULL OUTER JOIN
     // (VALUES(1 x, 1 y)) b ON (a.x = b.y)). We need to set the correct value.
diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
index 441c010..2b71ee3 100644
--- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
@@ -712,9 +712,23 @@ public class CardinalityTest extends PlannerTestBase {
     // There is no available statistics in functional.tinytable.
     // True cardinality of functional.tinytable is 3.
     String subQuery = "SELECT * FROM functional.tinytable";
-    verifyApproxCardinality(subQuery + " UNION ALL " + subQuery, 0, true,
+    verifyApproxCardinality(subQuery + " UNION ALL " + subQuery, -1, true,
         ImmutableSet.of(PlannerTestOption.DISABLE_HDFS_NUM_ROWS_ESTIMATE),
         path, UnionNode.class);
+
+    // Same test, except with some constant expressions added to the union.
+    // The output cardinality still cannot be estimated.
+    verifyApproxCardinality(
+        subQuery + " UNION ALL " + subQuery + " UNION ALL SELECT 'a', 'b'", -1, true,
+        ImmutableSet.of(PlannerTestOption.DISABLE_HDFS_NUM_ROWS_ESTIMATE),
+        path, UnionNode.class);
+
+    // If one branch of the union has known cardinality, that value is used.
+    // alltypestiny has 8 rows and computed stats.
+    verifyApproxCardinality(subQuery + " UNION ALL " +
+        "SELECT string_col, date_string_col from functional.alltypestiny",
+        8, true, ImmutableSet.of(PlannerTestOption.DISABLE_HDFS_NUM_ROWS_ESTIMATE),
+        path, UnionNode.class);
   }
 
   @Test