You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/11 02:18:04 UTC
[impala] 03/03: IMPALA-8848: fix UNION missing input cardinality bug
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2066b72bc4b43fd57679c37145eecebaa6be8b27
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Aug 8 13:52:00 2019 -0700
IMPALA-8848: fix UNION missing input cardinality bug
If a UNION has children, and none of those children has
a known cardinality, then we can make no reasonable
estimate of the output cardinality, so the planner
should consider the output cardinality to be unknown.
The previous behaviour was to report a cardinality of
0, which is unsafe because the planner may make further
decisions under the incorrect assumption that the output
of the UNION is tiny.
Testing:
An existing CardinalityTest already tested this but had
the wrong estimate.
Change-Id: Ic3ed670ffb685d8ff24824933ca303f3219737bb
Reviewed-on: http://gerrit.cloudera.org:8080/14036
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../main/java/org/apache/impala/planner/UnionNode.java | 15 ++++++++++++---
.../java/org/apache/impala/planner/CardinalityTest.java | 16 +++++++++++++++-
2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/planner/UnionNode.java b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
index d119ac7..cfdd076 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnionNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
@@ -110,15 +110,24 @@ public class UnionNode extends PlanNode {
@Override
public void computeStats(Analyzer analyzer) {
super.computeStats(analyzer);
- cardinality_ = constExprLists_.size();
+ long totalChildCardinality = 0;
+ boolean haveChildWithCardinality = false;
for (PlanNode child: children_) {
// ignore missing child cardinality info in the hope it won't matter enough
// to change the planning outcome
- if (child.cardinality_ > 0) {
- cardinality_ = checkedAdd(cardinality_, child.cardinality_);
+ if (child.cardinality_ >= 0) {
+ totalChildCardinality = checkedAdd(totalChildCardinality, child.cardinality_);
+ haveChildWithCardinality = true;
}
numNodes_ = Math.max(child.getNumNodes(), numNodes_);
}
+ // Consider estimate valid if we have at least one child with known cardinality, or
+ // only constant values.
+ if (haveChildWithCardinality || children_.size() == 0) {
+ cardinality_ = checkedAdd(totalChildCardinality, constExprLists_.size());
+ } else {
+ cardinality_ = -1;
+ }
// The number of nodes of a union node is -1 (invalid) if all the referenced tables
// are inline views (e.g. select 1 FROM (VALUES(1 x, 1 y)) a FULL OUTER JOIN
// (VALUES(1 x, 1 y)) b ON (a.x = b.y)). We need to set the correct value.
diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
index 441c010..2b71ee3 100644
--- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
@@ -712,9 +712,23 @@ public class CardinalityTest extends PlannerTestBase {
// There is no available statistics in functional.tinytable.
// True cardinality of functional.tinytable is 3.
String subQuery = "SELECT * FROM functional.tinytable";
- verifyApproxCardinality(subQuery + " UNION ALL " + subQuery, 0, true,
+ verifyApproxCardinality(subQuery + " UNION ALL " + subQuery, -1, true,
ImmutableSet.of(PlannerTestOption.DISABLE_HDFS_NUM_ROWS_ESTIMATE),
path, UnionNode.class);
+
+ // Same test, except with some constant expressions added to the union.
+ // The output cardinality still cannot be estimated.
+ verifyApproxCardinality(
+ subQuery + " UNION ALL " + subQuery + " UNION ALL SELECT 'a', 'b'", -1, true,
+ ImmutableSet.of(PlannerTestOption.DISABLE_HDFS_NUM_ROWS_ESTIMATE),
+ path, UnionNode.class);
+
+ // If one branch of the union has known cardinality, that value is used.
+ // alltypestiny has 8 rows and computed stats.
+ verifyApproxCardinality(subQuery + " UNION ALL " +
+ "SELECT string_col, date_string_col from functional.alltypestiny",
+ 8, true, ImmutableSet.of(PlannerTestOption.DISABLE_HDFS_NUM_ROWS_ESTIMATE),
+ path, UnionNode.class);
}
@Test