You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bi...@apache.org on 2018/11/08 09:12:31 UTC
[3/4] impala git commit: IMPALA-7528: Fix division by zero when
computing cardinalities
IMPALA-7528: Fix division by zero when computing cardinalities
This patch fixes a case where there can be a division by zero when
computing cardinalities of many to many joins on NULL columns.
Testing:
Added a planner test case
Change-Id: Ieedd51d3ad6131a4ea2a5883f05104e6a0f2cd14
Reviewed-on: http://gerrit.cloudera.org:8080/11901
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/898c5158
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/898c5158
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/898c5158
Branch: refs/heads/master
Commit: 898c515882dc64e1c98bff073f5529ea63a20cfe
Parents: d1aa1c0
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Tue Nov 6 23:44:19 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Nov 8 05:42:45 2018 +0000
----------------------------------------------------------------------
fe/src/main/java/org/apache/impala/planner/JoinNode.java | 7 ++++++-
fe/src/test/java/org/apache/impala/planner/PlannerTest.java | 9 +++++++++
testdata/bin/compute-table-stats.sh | 2 +-
.../functional-planner/queries/PlannerTest/joins.test | 4 ++--
4 files changed, 18 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/fe/src/main/java/org/apache/impala/planner/JoinNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/JoinNode.java b/fe/src/main/java/org/apache/impala/planner/JoinNode.java
index cc50318..96f93cd 100644
--- a/fe/src/main/java/org/apache/impala/planner/JoinNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/JoinNode.java
@@ -365,7 +365,12 @@ public abstract class JoinNode extends PlanNode {
if (slots.lhsNumRows() > lhsCard) lhsAdjNdv *= lhsCard / slots.lhsNumRows();
double rhsAdjNdv = slots.rhsNdv();
if (slots.rhsNumRows() > rhsCard) rhsAdjNdv *= rhsCard / slots.rhsNumRows();
- long joinCard = Math.round((lhsCard / Math.max(lhsAdjNdv, rhsAdjNdv)) * rhsCard);
+ // A lower limit of 1 on the max Adjusted Ndv ensures we don't estimate
+ // cardinality more than the max possible. This also handles the case of
+ // null columns on both sides having an Ndv of zero (which would change
+ // after IMPALA-7310 is fixed).
+ long joinCard = Math.round((lhsCard / Math.max(1, Math.max(lhsAdjNdv, rhsAdjNdv))) *
+ rhsCard);
if (result == -1) {
result = joinCard;
} else {
http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 2ec4b15..82235ed 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -703,4 +703,13 @@ public class PlannerTest extends PlannerTestBase {
assertEquals(HBaseScanNode.memoryEstimateForFetchingColumns(largeColumnList),
8 * 1024 * 1024);
}
+
+ @Test
+ public void testNullColumnJoinCardinality() throws ImpalaException {
+ // IMPALA-7565: Make sure there is no division by zero during cardinality calculation
+ // in a many to many join on null columns (ndv = 0).
+ String query = "select * from functional.nulltable t1 "
+ + "inner join [shuffle] functional.nulltable t2 on t1.d = t2.d";
+ checkCardinality(query, 1, 1);
+ }
}
http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/testdata/bin/compute-table-stats.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index 08c7595..d6e6d22 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -33,7 +33,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad
# Run compute stats over as many of the tables used in the Planner tests as possible.
${COMPUTE_STATS_SCRIPT} --db_names=functional\
--table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls,
- alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey"
+ alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable"
# We cannot load HBase on s3 and isilon yet.
if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index 4c5d7ab..b681acb 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -2387,10 +2387,10 @@ PLAN-ROOT SINK
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: t1.d IS DISTINCT FROM t2.d
|
-|--01:SCAN HDFS [functional.nulltable t2]
+|--00:SCAN HDFS [functional.nulltable t1]
| partitions=1/1 files=1 size=18B
|
-00:SCAN HDFS [functional.nulltable t1]
+01:SCAN HDFS [functional.nulltable t2]
partitions=1/1 files=1 size=18B
====
# IMPALA-3450: limits on join nodes are reflected in cardinality estimates. The test for