You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bi...@apache.org on 2018/11/08 09:12:31 UTC

[3/4] impala git commit: IMPALA-7528: Fix division by zero when computing cardinalities

IMPALA-7528: Fix division by zero when computing cardinalities

This patch fixes a case where there can be a division by zero when
computing cardinalities of many to many joins on NULL columns.

Testing:
Added a planner test case

Change-Id: Ieedd51d3ad6131a4ea2a5883f05104e6a0f2cd14
Reviewed-on: http://gerrit.cloudera.org:8080/11901
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/898c5158
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/898c5158
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/898c5158

Branch: refs/heads/master
Commit: 898c515882dc64e1c98bff073f5529ea63a20cfe
Parents: d1aa1c0
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Tue Nov 6 23:44:19 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Nov 8 05:42:45 2018 +0000

----------------------------------------------------------------------
 fe/src/main/java/org/apache/impala/planner/JoinNode.java    | 7 ++++++-
 fe/src/test/java/org/apache/impala/planner/PlannerTest.java | 9 +++++++++
 testdata/bin/compute-table-stats.sh                         | 2 +-
 .../functional-planner/queries/PlannerTest/joins.test       | 4 ++--
 4 files changed, 18 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/fe/src/main/java/org/apache/impala/planner/JoinNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/JoinNode.java b/fe/src/main/java/org/apache/impala/planner/JoinNode.java
index cc50318..96f93cd 100644
--- a/fe/src/main/java/org/apache/impala/planner/JoinNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/JoinNode.java
@@ -365,7 +365,12 @@ public abstract class JoinNode extends PlanNode {
       if (slots.lhsNumRows() > lhsCard) lhsAdjNdv *= lhsCard / slots.lhsNumRows();
       double rhsAdjNdv = slots.rhsNdv();
       if (slots.rhsNumRows() > rhsCard) rhsAdjNdv *= rhsCard / slots.rhsNumRows();
-      long joinCard = Math.round((lhsCard / Math.max(lhsAdjNdv, rhsAdjNdv)) * rhsCard);
+      // A lower limit of 1 on the max Adjusted Ndv ensures we don't estimate
+      // cardinality more than the max possible. This also handles the case of
+      // null columns on both sides having an Ndv of zero (which would change
+      // after IMPALA-7310 is fixed).
+      long joinCard = Math.round((lhsCard / Math.max(1, Math.max(lhsAdjNdv, rhsAdjNdv))) *
+          rhsCard);
       if (result == -1) {
         result = joinCard;
       } else {

http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 2ec4b15..82235ed 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -703,4 +703,13 @@ public class PlannerTest extends PlannerTestBase {
     assertEquals(HBaseScanNode.memoryEstimateForFetchingColumns(largeColumnList),
         8 * 1024 * 1024);
   }
+
+  @Test
+  public void testNullColumnJoinCardinality() throws ImpalaException {
+    // IMPALA-7565: Make sure there is no division by zero during cardinality calculation
+    // in a many to many join on null columns (ndv = 0).
+    String query = "select * from functional.nulltable t1 "
+        + "inner join [shuffle] functional.nulltable t2 on t1.d = t2.d";
+    checkCardinality(query, 1, 1);
+  }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/testdata/bin/compute-table-stats.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index 08c7595..d6e6d22 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -33,7 +33,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad
 # Run compute stats over as many of the tables used in the Planner tests as possible.
 ${COMPUTE_STATS_SCRIPT} --db_names=functional\
     --table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls,
-    alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey"
+    alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable"
 
 # We cannot load HBase on s3 and isilon yet.
 if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then

http://git-wip-us.apache.org/repos/asf/impala/blob/898c5158/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index 4c5d7ab..b681acb 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -2387,10 +2387,10 @@ PLAN-ROOT SINK
 03:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: t1.d IS DISTINCT FROM t2.d
 |
-|--01:SCAN HDFS [functional.nulltable t2]
+|--00:SCAN HDFS [functional.nulltable t1]
 |     partitions=1/1 files=1 size=18B
 |
-00:SCAN HDFS [functional.nulltable t1]
+01:SCAN HDFS [functional.nulltable t2]
    partitions=1/1 files=1 size=18B
 ====
 # IMPALA-3450: limits on join nodes are reflected in cardinality estimates. The test for