You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by dh...@apache.org on 2017/05/19 16:22:05 UTC

[2/2] incubator-impala git commit: IMPALA-5180: Don't use non-deterministic exprs in partition pruning

IMPALA-5180: Don't use non-deterministic exprs in partition pruning

Non-deterministic exprs which evaluate as constant should not be
used during HDFS partition pruning.  We consider Exprs which have no
SlotRefs as bound by default, and thus we end up trying to apply
them indisrciminately.  Constant propagation makes this situation
easier to run into and the behavior is rather unexpected.

The fix for now is to explicitly disallow non-deterministic Exprs
in partition pruning.

Change-Id: I91054c6bf017401242259a1eff5e859085285546
Reviewed-on: http://gerrit.cloudera.org:8080/6575
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d6e612f5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d6e612f5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d6e612f5

Branch: refs/heads/master
Commit: d6e612f5c7c800bce98dc35b2e04dd893d1b6f86
Parents: 6226e59
Author: Zach Amsden <za...@cloudera.com>
Authored: Thu Apr 6 23:41:11 2017 +0000
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri May 19 08:50:25 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/PartitionSet.java    |  2 +-
 .../impala/planner/HdfsPartitionPruner.java     |  3 +-
 .../org/apache/impala/planner/HdfsScanNode.java |  1 +
 .../queries/PlannerTest/hdfs.test               | 42 ++++++++++++++++----
 4 files changed, 39 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6e612f5/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java b/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
index d5f0e70..3ff4ed7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
+++ b/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
@@ -75,7 +75,7 @@ public class PartitionSet extends PartitionSpecBase {
       }
 
       // Make sure every conjunct only contains partition slot refs.
-      if(!e.isBoundBySlotIds(partitionSlots)) {
+      if (!e.isBoundBySlotIds(partitionSlots)) {
         throw new AnalysisException("Partition exprs cannot contain non-partition " +
             "column(s): " + e.toSql() + ".");
       }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6e612f5/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
index cc27528..95ef68c 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
@@ -108,7 +108,8 @@ public class HdfsPartitionPruner {
     Iterator<Expr> it = conjuncts.iterator();
     while (it.hasNext()) {
       Expr conjunct = it.next();
-      if (conjunct.isBoundBySlotIds(partitionSlots_)) {
+      if (conjunct.isBoundBySlotIds(partitionSlots_) &&
+          !conjunct.contains(Expr.IS_NONDETERMINISTIC_BUILTIN_FN_PREDICATE)) {
         // Check if the conjunct can be evaluated from the partition metadata.
         // Use a cloned conjunct to rewrite BetweenPredicates and allow
         // canEvalUsingPartitionMd() to fold constant expressions without modifying

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6e612f5/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 296dfb4..a3286b4 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -485,6 +485,7 @@ public class HdfsScanNode extends ScanNode {
       List<SlotId> slotIds = Lists.newArrayList();
 
       conjunct.getIds(tupleIds, slotIds);
+      if (slotIds.size() == 0) continue;
       Preconditions.checkState(tupleIds.size() == 1);
       if (slotIds.size() != 1) continue;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6e612f5/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
index 5ed6f6b..9b9a678 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
@@ -1016,14 +1016,42 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ====
-# IMPALA-4592: Test that we bail on evaluating non-deterministic predicates when trying
-# to prune partitions.
-select * from functional.alltypes where rand() > 100
+# IMPALA-4592: Test random predicates that reference a partition column.
+select * from functional.alltypes where rand() > year
 ---- PLAN
-not implemented: Unsupported non-deterministic predicate: rand() > 100
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+   predicates: year < rand()
 ====
-# IMPALA-4592: Same as above but the predicate references a partition column.
-select * from functional.alltypes where rand() > year
+# IMPALA-5180: Test that predicates not touching a partition column are ignored in
+# partition pruning
+select count(*) from
+  (select * from functional.alltypes) T
+where random() = 100
+---- PLAN
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(*)
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+   predicates: random() = 100
+====
+# Same as above, with a column that gets constant folded away
+select count(*) from
+  (select * from functional.alltypes) T
+where (coalesce(NULL, T.int_col) + random() * T.int_col = 100 OR
+      coalesce(NULL, T.int_col) + T.int_col = 20) and T.int_col = 1
 ---- PLAN
-not implemented: Unsupported non-deterministic predicate: year < rand()
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(*)
+|
+00:SCAN HDFS [functional.alltypes]
+   partitions=24/24 files=24 size=478.45KB
+   predicates: functional.alltypes.int_col = 1, FALSE OR 1 + random() * 1 = 100
 ====