You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/07/23 05:35:23 UTC
incubator-impala git commit: IMPALA-3084: Cache the sequence of table ref and materialized tuple ids during analysis.

Repository: incubator-impala
Updated Branches:
  refs/heads/master e0fb432b8 -> 55b43ba8c


IMPALA-3084: Cache the sequence of table ref and materialized tuple ids during analysis.

The bug: For correct predicate assignment we rely on TableRef.getAllTupleIds()
and TableRef.getMaterializedTupleIds(). The implementation of those functions
used to traverse the chain of table refs and collect the appropriate ids.
However, during plan generation we alter the chain of table refs, in particular,
for dealing with nested collections, so those altered TableRefs do not return the
expected list of ids, leading to wrong decisions in predicate assignment.

The fix: Cache the lists of ids during analysis, so we are free to alter the
chain of TableRefs during plan generation.

Change-Id: I298b8695c9f26644a395ca9f0e86040e3f5f3846
Reviewed-on: http://gerrit.cloudera.org:8080/2415
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/55b43ba8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/55b43ba8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/55b43ba8

Branch: refs/heads/master
Commit: 55b43ba8c46c635ce94146c9606b25b286ab9f03
Parents: e0fb432
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Feb 29 18:17:22 2016 -0800
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Fri Jul 22 22:35:18 2016 -0700

----------------------------------------------------------------------
 .../com/cloudera/impala/analysis/Analyzer.java  |  16 +--
 .../impala/analysis/SingularRowSrcTableRef.java |   4 +-
 .../com/cloudera/impala/analysis/TableRef.java  |  64 +++++-----
 .../cloudera/impala/planner/HdfsScanNode.java   |   6 +-
 .../impala/planner/SingleNodePlanner.java       |  34 +++--
 .../impala/analysis/AnalyzeStmtsTest.java       |   2 +-
 .../queries/PlannerTest/join-order.test         |  58 ++++-----
 .../queries/PlannerTest/nested-collections.test |  22 +++-
 .../queries/PlannerTest/tpch-nested.test        | 128 +++++++++----------
 9 files changed, 182 insertions(+), 152 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java
index f96b3c5..30e1f26 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java
@@ -1274,7 +1274,7 @@ public class Analyzer {
   public boolean canEvalFullOuterJoinedConjunct(Expr e, List<TupleId> tids) {
     TableRef fullOuterJoin = getFullOuterJoinRef(e);
     if (fullOuterJoin == null) return true;
-    return tids.containsAll(fullOuterJoin.getAllTupleIds());
+    return tids.containsAll(fullOuterJoin.getAllTableRefIds());
   }
 
   /**
@@ -1330,7 +1330,7 @@ public class Analyzer {
           // conjunct at the join that the On-clause belongs to.
           TableRef onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
           Preconditions.checkNotNull(onClauseTableRef);
-          return tupleIds.containsAll(onClauseTableRef.getAllTupleIds());
+          return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
         }
         // If this single tid conjunct is from the On-clause of an anti-join, check if we
         // can assign it to this node.
@@ -1349,10 +1349,10 @@ public class Analyzer {
       // this is not outer-joined; ignore
       if (rhsRef == null) continue;
       // check whether the last join to outer-join 'tid' is materialized by tupleIds
-      boolean contains = tupleIds.containsAll(rhsRef.getAllTupleIds());
+      boolean contains = tupleIds.containsAll(rhsRef.getAllTableRefIds());
       LOG.trace("canEval: contains=" + (contains ? "true " : "false ")
-          + Id.printIds(tupleIds) + " " + Id.printIds(rhsRef.getAllTupleIds()));
-      if (!tupleIds.containsAll(rhsRef.getAllTupleIds())) return false;
+          + Id.printIds(tupleIds) + " " + Id.printIds(rhsRef.getAllTableRefIds()));
+      if (!tupleIds.containsAll(rhsRef.getAllTableRefIds())) return false;
     }
     return true;
   }
@@ -1367,8 +1367,8 @@ public class Analyzer {
     List<TupleId> tids = Lists.newArrayList();
     e.getIds(tids, null);
     if (tids.size() > 1) {
-      return nodeTupleIds.containsAll(antiJoinRef.getAllTupleIds())
-          && antiJoinRef.getAllTupleIds().containsAll(nodeTupleIds);
+      return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds())
+          && antiJoinRef.getAllTableRefIds().containsAll(nodeTupleIds);
     }
     // A single tid conjunct that is anti-joined can be safely assigned to a
     // node below the anti join that specified it.
@@ -2442,7 +2442,7 @@ public class Analyzer {
   public int decrementCallDepth() { return --callDepth_; }
   public int getCallDepth() { return callDepth_; }
 
-  private boolean hasMutualValueTransfer(SlotId slotA, SlotId slotB) {
+  public boolean hasMutualValueTransfer(SlotId slotA, SlotId slotB) {
     return hasValueTransfer(slotA, slotB) && hasValueTransfer(slotB, slotA);
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/fe/src/main/java/com/cloudera/impala/analysis/SingularRowSrcTableRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SingularRowSrcTableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/SingularRowSrcTableRef.java
index 0c8848e..fff5779 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/SingularRowSrcTableRef.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/SingularRowSrcTableRef.java
@@ -46,7 +46,7 @@ public class SingularRowSrcTableRef extends TableRef {
     super.setLeftTblRef(leftTblRef);
     tblRefIds_.clear();
     tupleIds_.clear();
-    tblRefIds_.addAll(leftTblRef_.getAllTupleIds());
+    tblRefIds_.addAll(leftTblRef_.getAllTableRefIds());
     tupleIds_.addAll(leftTblRef_.getAllMaterializedTupleIds());
   }
 
@@ -54,7 +54,7 @@ public class SingularRowSrcTableRef extends TableRef {
   public TupleId getId() { return tblRefIds_.get(tblRefIds_.size() - 1); }
 
   @Override
-  public List<TupleId> getAllTupleIds() { return tblRefIds_; }
+  public List<TupleId> getAllTableRefIds() { return tblRefIds_; }
 
   @Override
   public List<TupleId> getAllMaterializedTupleIds() { return tupleIds_; }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
index 6f9ac85..cfb8c6e 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
@@ -108,8 +108,13 @@ public class TableRef implements ParseNode {
   // at the end of analyze() call.
   protected boolean isAnalyzed_;
 
-  // all (logical) TupleIds referenced in the On clause
-  protected List<TupleId> onClauseTupleIds_ = Lists.newArrayList();
+  // Lists of table ref ids and materialized tuple ids of the full sequence of table
+  // refs up to and including this one. These ids are cached during analysis because
+  // we may alter the chain of table refs during plan generation, but we still rely
+  // on the original list of ids for correct predicate assignment.
+  // Populated in analyzeJoin().
+  protected List<TupleId> allTableRefIds_ = Lists.newArrayList();
+  protected List<TupleId> allMaterializedTupleIds_ = Lists.newArrayList();
 
   // All physical tuple ids that this table ref is correlated with:
   // Tuple ids of root descriptors from outer query blocks that this table ref
@@ -160,7 +165,8 @@ public class TableRef implements ParseNode {
     // table refs is the responsibility of the statement.
     leftTblRef_ = null;
     isAnalyzed_ = other.isAnalyzed_;
-    onClauseTupleIds_ = Lists.newArrayList(other.onClauseTupleIds_);
+    allTableRefIds_ = Lists.newArrayList(other.allTableRefIds_);
+    allMaterializedTupleIds_ = Lists.newArrayList(other.allMaterializedTupleIds_);
     correlatedTupleIds_ = Lists.newArrayList(other.correlatedTupleIds_);
     desc_ = other.desc_;
   }
@@ -312,7 +318,6 @@ public class TableRef implements ParseNode {
     return distrMode_ == DistributionMode.PARTITIONED;
   }
   public DistributionMode getDistributionMode() { return distrMode_; }
-  public List<TupleId> getOnClauseTupleIds() { return onClauseTupleIds_; }
   public List<TupleId> getCorrelatedTupleIds() { return correlatedTupleIds_; }
   public boolean isAnalyzed() { return isAnalyzed_; }
   public boolean isResolved() { return !getClass().equals(TableRef.class); }
@@ -345,32 +350,21 @@ public class TableRef implements ParseNode {
   }
 
   /**
-   * Return the list of tuple ids materialized by the full sequence of
-   * table refs up to this one.
+   * Returns the list of tuple ids materialized by the full sequence of
+   * table refs up to and including this one.
    */
   public List<TupleId> getAllMaterializedTupleIds() {
-    if (leftTblRef_ != null) {
-      List<TupleId> result =
-          Lists.newArrayList(leftTblRef_.getAllMaterializedTupleIds());
-      result.addAll(getMaterializedTupleIds());
-      return result;
-    } else {
-      return getMaterializedTupleIds();
-    }
+    Preconditions.checkState(isAnalyzed_);
+    return allMaterializedTupleIds_;
   }
 
   /**
-   * Return the list of tuple ids of the full sequence of table refs up to this one.
+   * Return the list of table ref ids of the full sequence of table refs up to
+   * and including this one.
    */
-  public List<TupleId> getAllTupleIds() {
+  public List<TupleId> getAllTableRefIds() {
     Preconditions.checkState(isAnalyzed_);
-    if (leftTblRef_ != null) {
-      List<TupleId> result = leftTblRef_.getAllTupleIds();
-      result.add(desc_.getId());
-      return result;
-    } else {
-      return Lists.newArrayList(desc_.getId());
-    }
+    return allTableRefIds_;
   }
 
   protected void analyzeHints(Analyzer analyzer) throws AnalysisException {
@@ -437,12 +431,24 @@ public class TableRef implements ParseNode {
   }
 
   /**
-   * Analyze the join clause.
+   * Analyzes the join clause. Populates allTableRefIds_ and allMaterializedTupleIds_.
    * The join clause can only be analyzed after the left table has been analyzed
    * and the TupleDescriptor (desc) of this table has been created.
    */
   public void analyzeJoin(Analyzer analyzer) throws AnalysisException {
+    Preconditions.checkState(leftTblRef_ == null || leftTblRef_.isAnalyzed_);
     Preconditions.checkState(desc_ != null);
+
+    // Populate the lists of all table ref and materialized tuple ids.
+    allTableRefIds_.clear();
+    allMaterializedTupleIds_.clear();
+    if (leftTblRef_ != null) {
+      allTableRefIds_.addAll(leftTblRef_.getAllTableRefIds());
+      allMaterializedTupleIds_.addAll(leftTblRef_.getAllMaterializedTupleIds());
+    }
+    allTableRefIds_.add(getId());
+    allMaterializedTupleIds_.addAll(getMaterializedTupleIds());
+
     if (joinOp_ == JoinOperator.CROSS_JOIN) {
       // A CROSS JOIN is always a broadcast join, regardless of the join hints
       distrMode_ = DistributionMode.BROADCAST;
@@ -487,11 +493,11 @@ public class TableRef implements ParseNode {
     }
     if (joinOp_ == JoinOperator.RIGHT_OUTER_JOIN
         || joinOp_ == JoinOperator.FULL_OUTER_JOIN) {
-      analyzer.registerOuterJoinedTids(leftTblRef_.getAllTupleIds(), this);
+      analyzer.registerOuterJoinedTids(leftTblRef_.getAllTableRefIds(), this);
     }
     // register the tuple ids of a full outer join
     if (joinOp_ == JoinOperator.FULL_OUTER_JOIN) {
-      analyzer.registerFullOuterJoinedTids(leftTblRef_.getAllTupleIds(), this);
+      analyzer.registerFullOuterJoinedTids(leftTblRef_.getAllTableRefIds(), this);
       analyzer.registerFullOuterJoinedTids(getId().asList(), this);
     }
     // register the tuple id of the rhs of a left semi join
@@ -536,7 +542,6 @@ public class TableRef implements ParseNode {
         e.getIds(tupleIds, null);
         onClauseTupleIds.addAll(tupleIds);
       }
-      onClauseTupleIds_.addAll(onClauseTupleIds);
     } else if (!isRelative() && !isCorrelated()
         && (getJoinOp().isOuterJoin() || getJoinOp().isSemiJoin())) {
       throw new AnalysisException(
@@ -649,8 +654,9 @@ public class TableRef implements ParseNode {
       onClause_.reset();
     }
     leftTblRef_ = null;
-    onClauseTupleIds_.clear();
-    desc_ = null;
+    allTableRefIds_.clear();
+    allMaterializedTupleIds_.clear();
     correlatedTupleIds_.clear();
+    desc_ = null;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java b/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java
index 611bb77..6fb3208 100644
--- a/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java
@@ -94,8 +94,8 @@ public class HdfsScanNode extends ScanNode {
   // Partitions that are filtered in for scanning by the key ranges
   private final List<HdfsPartition> partitions_;
 
-  private TReplicaPreference replicaPreference_;
-  private boolean randomReplica_;
+  private final TReplicaPreference replicaPreference_;
+  private final boolean randomReplica_;
 
   // Total number of files from partitions_
   private long totalFiles_ = 0;
@@ -274,7 +274,7 @@ public class HdfsScanNode extends ScanNode {
       // predicates based on slot equivalences and enforce slot equivalences by
       // generating new predicates.
       List<Expr> collectionConjuncts =
-          analyzer.getUnassignedConjuncts(Lists.newArrayList(itemTid), false);
+          analyzer.getUnassignedConjuncts(Lists.newArrayList(itemTid));
       ArrayList<Expr> bindingPredicates = analyzer.getBoundPredicates(itemTid);
       for (Expr boundPred: bindingPredicates) {
         if (!collectionConjuncts.contains(boundPred)) collectionConjuncts.add(boundPred);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java
index 84552e0..235f3d9 100644
--- a/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java
@@ -756,15 +756,14 @@ public class SingleNodePlanner {
           // table ref ids to the left and including the last outer/semi join.
           // TODO: Think about when we can allow re-ordering across semi/outer joins
           // in subplans.
-          requiredTblRefIds.addAll(lastSemiOrOuterJoin.getAllTupleIds());
+          requiredTblRefIds.addAll(lastSemiOrOuterJoin.getAllTableRefIds());
         }
         if (!subplanTids.containsAll(requiredTids)) {
           isParentRef = false;
-          // For outer and semi joins, we also need to ensure that the On-clause
-          // conjuncts can be evaluated, so add those required table ref ids,
-          // excluding the id of ref itself.
+          // Outer and semi joins are placed at a fixed position in the join order.
+          // They require that all tables to their left are materialized.
           if (ref.getJoinOp().isOuterJoin() || ref.getJoinOp().isSemiJoin()) {
-            requiredTblRefIds.addAll(ref.getOnClauseTupleIds());
+            requiredTblRefIds.addAll(ref.getAllTableRefIds());
             requiredTblRefIds.remove(ref.getId());
           }
           subplanRefs.add(new SubplanRef(ref, requiredTids, requiredTblRefIds));
@@ -1391,14 +1390,23 @@ public class SingleNodePlanner {
       TableRef rhsTblRef = analyzer.getTableRef(rhsId);
       Preconditions.checkNotNull(rhsTblRef);
       for (SlotDescriptor slotDesc: rhsTblRef.getDesc().getSlots()) {
-        List<SlotId> lhsSlotIds = analyzer.getEquivSlots(slotDesc.getId(), lhsTblRefIds);
-        if (!lhsSlotIds.isEmpty()) {
-          // construct a BinaryPredicates in order to get correct casting;
-          // we only do this for one of the equivalent slots, all the other implied
-          // equalities are redundant
-          BinaryPredicate pred =
-              analyzer.createInferredEqPred(lhsSlotIds.get(0), slotDesc.getId());
-          result.add(pred);
+        SlotId rhsSid = slotDesc.getId();
+        // List of slots that participate in a value transfer with rhsSid and are belong
+        // to a tuple in lhsTblRefIds. The value transfer is not necessarily mutual.
+        List<SlotId> lhsSlotIds = analyzer.getEquivSlots(rhsSid, lhsTblRefIds);
+        for (SlotId lhsSid: lhsSlotIds) {
+          // A mutual value transfer between lhsSid and rhsSid is required for correctly
+          // generating an inferred predicate. Otherwise, the predicate might incorrectly
+          // eliminate rows that would have been non-matches of an outer or anti join.
+          if (analyzer.hasMutualValueTransfer(lhsSid, rhsSid)) {
+            // construct a BinaryPredicates in order to get correct casting;
+            // we only do this for one of the equivalent slots, all the other implied
+            // equalities are redundant
+            BinaryPredicate pred =
+                analyzer.createInferredEqPred(lhsSid, rhsSid);
+            result.add(pred);
+            break;
+          }
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
index faa1834..2c6b9bf 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
@@ -3443,7 +3443,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
     testNumberOfMembers(ValuesStmt.class, 0);
 
     // Also check TableRefs.
-    testNumberOfMembers(TableRef.class, 17);
+    testNumberOfMembers(TableRef.class, 18);
     testNumberOfMembers(BaseTableRef.class, 0);
     testNumberOfMembers(InlineViewRef.class, 8);
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
index 441afca..d43b81f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
@@ -792,19 +792,19 @@ inner join functional.alltypestiny t6 on (t5.id = t6.id)
 |  output: count(*)
 |
 10:HASH JOIN [INNER JOIN]
-|  hash predicates: t5.id = t6.id
-|  runtime filters: RF000 <- t6.id
-|
-|--05:SCAN HDFS [functional.alltypestiny t6]
-|     partitions=4/4 files=4 size=460B
-|
-09:HASH JOIN [INNER JOIN]
 |  hash predicates: t4.id = t5.id
-|  runtime filters: RF001 <- t5.id
+|  runtime filters: RF000 <- t5.id
 |
 |--04:SCAN HDFS [functional.alltypes t5]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF000 -> t5.id
+|
+09:HASH JOIN [INNER JOIN]
+|  hash predicates: t4.id = t6.id
+|  runtime filters: RF001 <- t6.id
+|
+|--05:SCAN HDFS [functional.alltypestiny t6]
+|     partitions=4/4 files=4 size=460B
+|     runtime filters: RF000 -> t6.id
 |
 08:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t4.id = t3.id
@@ -911,20 +911,20 @@ inner join functional.alltypestiny t6 on (t3.id = t6.id)
 |  runtime filters: RF001 <- t4.id
 |
 |--09:HASH JOIN [INNER JOIN]
-|  |  hash predicates: t3.id = t4.id
-|  |  runtime filters: RF002 <- t4.id
-|  |
-|  |--03:SCAN HDFS [functional.alltypessmall t4]
-|  |     partitions=4/4 files=4 size=6.32KB
-|  |     runtime filters: RF000 -> t4.id
-|  |
-|  08:HASH JOIN [INNER JOIN]
 |  |  hash predicates: t2.id = t3.id
-|  |  runtime filters: RF003 <- t3.id
+|  |  runtime filters: RF002 <- t3.id
 |  |
 |  |--02:SCAN HDFS [functional.alltypesagg t3]
 |  |     partitions=11/11 files=11 size=814.73KB
-|  |     runtime filters: RF000 -> t3.id, RF002 -> t3.id
+|  |     runtime filters: RF000 -> t3.id
+|  |
+|  08:HASH JOIN [INNER JOIN]
+|  |  hash predicates: t2.id = t4.id
+|  |  runtime filters: RF003 <- t4.id
+|  |
+|  |--03:SCAN HDFS [functional.alltypessmall t4]
+|  |     partitions=4/4 files=4 size=6.32KB
+|  |     runtime filters: RF000 -> t4.id, RF002 -> t4.id
 |  |
 |  07:HASH JOIN [RIGHT OUTER JOIN]
 |  |  hash predicates: t2.id = t1.id
@@ -964,20 +964,20 @@ inner join functional.alltypestiny t6 on (t3.id = t6.id)
 |  hash predicates: t5.id = t4.id
 |
 |--09:HASH JOIN [INNER JOIN]
-|  |  hash predicates: t3.id = t4.id
-|  |  runtime filters: RF001 <- t4.id
-|  |
-|  |--03:SCAN HDFS [functional.alltypessmall t4]
-|  |     partitions=4/4 files=4 size=6.32KB
-|  |     runtime filters: RF000 -> t4.id
-|  |
-|  08:HASH JOIN [INNER JOIN]
 |  |  hash predicates: t2.id = t3.id
-|  |  runtime filters: RF002 <- t3.id
+|  |  runtime filters: RF001 <- t3.id
 |  |
 |  |--02:SCAN HDFS [functional.alltypesagg t3]
 |  |     partitions=11/11 files=11 size=814.73KB
-|  |     runtime filters: RF000 -> t3.id, RF001 -> t3.id
+|  |     runtime filters: RF000 -> t3.id
+|  |
+|  08:HASH JOIN [INNER JOIN]
+|  |  hash predicates: t2.id = t4.id
+|  |  runtime filters: RF002 <- t4.id
+|  |
+|  |--03:SCAN HDFS [functional.alltypessmall t4]
+|  |     partitions=4/4 files=4 size=6.32KB
+|  |     runtime filters: RF000 -> t4.id, RF001 -> t4.id
 |  |
 |  07:HASH JOIN [RIGHT OUTER JOIN]
 |  |  hash predicates: t2.id = t1.id

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
index ee0bce4..a8cbc3c 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
@@ -663,7 +663,6 @@ inner join functional.alltypes d on (b.id = d.id)
 |
 10:HASH JOIN [RIGHT ANTI JOIN]
 |  hash predicates: b.int_col = c.int_col
-|  other join predicates: b.int_col = c.int_col
 |
 |--02:SCAN HDFS [functional.alltypessmall c]
 |     partitions=4/4 files=4 size=6.32KB
@@ -747,7 +746,6 @@ where b.item < 10 and c.int_col > 30
 ---- PLAN
 12:HASH JOIN [RIGHT ANTI JOIN]
 |  hash predicates: d.value = e.id
-|  other join predicates: (d.value = e.id)
 |
 |--06:SCAN HDFS [functional.alltypestiny e]
 |     partitions=4/4 files=4 size=460B
@@ -1772,4 +1770,22 @@ left semi join c2.c_orders o2
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c1]
    partitions=1/1 files=4 size=577.87MB
-====
\ No newline at end of file
+====
+# IMPALA-3084: Test correct assignment of NULL checking predicates
+# referencing outer-joined nested collections.
+select * from tpch_nested_parquet.customer c
+left outer join c.c_orders o
+where o.o_orderkey is null and o.o_orderstatus <=> o_orderpriority
+---- PLAN
+01:SUBPLAN
+|
+|--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
+|  |  predicates: o.o_orderkey IS NULL, o.o_orderstatus IS NOT DISTINCT FROM o_orderpriority
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |
+|  03:UNNEST [c.c_orders o]
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c]
+   partitions=1/1 files=4 size=554.13MB
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/55b43ba8/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index 34bc270..6e582d5 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -316,7 +316,7 @@ limit 10
 |  03:UNNEST [c.c_orders o]
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
-   partitions=1/1 files=4 size=554.13MB
+   partitions=1/1 files=4 size=577.87MB
    predicates: c_mktsegment = 'BUILDING', !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
    predicates on l: l_shipdate > '1995-03-15'
@@ -355,7 +355,7 @@ limit 10
 |  03:UNNEST [c.c_orders o]
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
-   partitions=1/1 files=4 size=554.13MB
+   partitions=1/1 files=4 size=577.87MB
    predicates: c_mktsegment = 'BUILDING', !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
    predicates on l: l_shipdate > '1995-03-15'
@@ -393,17 +393,17 @@ order by
 |
 01:SUBPLAN
 |
-|--08:NESTED LOOP JOIN [CROSS JOIN]
-|  |
-|  |--02:SINGULAR ROW SRC
+|--08:SUBPLAN
 |  |
-|  04:SUBPLAN
-|  |
-|  |--07:NESTED LOOP JOIN [RIGHT SEMI JOIN]
+|  |--06:NESTED LOOP JOIN [RIGHT SEMI JOIN]
 |  |  |
-|  |  |--05:SINGULAR ROW SRC
+|  |  |--04:SINGULAR ROW SRC
 |  |  |
-|  |  06:UNNEST [o.o_lineitems]
+|  |  05:UNNEST [o.o_lineitems]
+|  |
+|  07:NESTED LOOP JOIN [CROSS JOIN]
+|  |
+|  |--02:SINGULAR ROW SRC
 |  |
 |  03:UNNEST [c.c_orders o]
 |
@@ -431,17 +431,17 @@ order by
 |
 01:SUBPLAN
 |
-|--08:NESTED LOOP JOIN [CROSS JOIN]
-|  |
-|  |--02:SINGULAR ROW SRC
-|  |
-|  04:SUBPLAN
+|--08:SUBPLAN
 |  |
-|  |--07:NESTED LOOP JOIN [RIGHT SEMI JOIN]
+|  |--06:NESTED LOOP JOIN [RIGHT SEMI JOIN]
 |  |  |
-|  |  |--05:SINGULAR ROW SRC
+|  |  |--04:SINGULAR ROW SRC
 |  |  |
-|  |  06:UNNEST [o.o_lineitems]
+|  |  05:UNNEST [o.o_lineitems]
+|  |
+|  07:NESTED LOOP JOIN [CROSS JOIN]
+|  |
+|  |--02:SINGULAR ROW SRC
 |  |
 |  03:UNNEST [c.c_orders o]
 |
@@ -610,7 +610,7 @@ where
 |  output: sum(l_extendedprice * l_discount)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
-   partitions=1/1 files=4 size=554.13MB
+   partitions=1/1 files=4 size=577.87MB
    predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
 ---- DISTRIBUTEDPLAN
 03:AGGREGATE [FINALIZE]
@@ -622,7 +622,7 @@ where
 |  output: sum(l_extendedprice * l_discount)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
-   partitions=1/1 files=4 size=554.13MB
+   partitions=1/1 files=4 size=577.87MB
    predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
 ====
 # TPCH-Q7
@@ -2279,19 +2279,24 @@ limit 100
 |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN]
 |  |  join predicates: l3.l_suppkey != l1.l_suppkey
 |  |
-|  |--14:SINGULAR ROW SRC
+|  |--15:NESTED LOOP JOIN [RIGHT SEMI JOIN]
+|  |  |  join predicates: l2.l_suppkey != l1.l_suppkey
+|  |  |
+|  |  |--12:SINGULAR ROW SRC
+|  |  |
+|  |  13:UNNEST [o.o_lineitems l2]
 |  |
-|  15:UNNEST [o.o_lineitems l3]
+|  14:UNNEST [o.o_lineitems l3]
 |
 17:HASH JOIN [INNER JOIN]
 |  hash predicates: s_nationkey = n_nationkey
 |  runtime filters: RF000 <- n_nationkey
 |
-|--12:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
+|--10:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
 |     partitions=1/1 files=1 size=4.18KB
 |     predicates: n_name = 'SAUDI ARABIA'
 |
-13:HASH JOIN [INNER JOIN]
+11:HASH JOIN [INNER JOIN]
 |  hash predicates: l1.l_suppkey = s_suppkey
 |
 |--00:SCAN HDFS [tpch_nested_parquet.supplier s]
@@ -2300,18 +2305,13 @@ limit 100
 |
 02:SUBPLAN
 |
-|--11:NESTED LOOP JOIN [CROSS JOIN]
+|--09:NESTED LOOP JOIN [CROSS JOIN]
 |  |
 |  |--03:SINGULAR ROW SRC
 |  |
 |  05:SUBPLAN
 |  |
-|  |--10:NESTED LOOP JOIN [LEFT SEMI JOIN]
-|  |  |  join predicates: l2.l_suppkey != l1.l_suppkey
-|  |  |
-|  |  |--08:UNNEST [o.o_lineitems l2]
-|  |  |
-|  |  09:NESTED LOOP JOIN [CROSS JOIN]
+|  |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |
 |  |  |--06:SINGULAR ROW SRC
 |  |  |
@@ -2348,9 +2348,14 @@ limit 100
 |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN]
 |  |  join predicates: l3.l_suppkey != l1.l_suppkey
 |  |
-|  |--14:SINGULAR ROW SRC
+|  |--15:NESTED LOOP JOIN [RIGHT SEMI JOIN]
+|  |  |  join predicates: l2.l_suppkey != l1.l_suppkey
+|  |  |
+|  |  |--12:SINGULAR ROW SRC
+|  |  |
+|  |  13:UNNEST [o.o_lineitems l2]
 |  |
-|  15:UNNEST [o.o_lineitems l3]
+|  14:UNNEST [o.o_lineitems l3]
 |
 17:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: s_nationkey = n_nationkey
@@ -2358,11 +2363,11 @@ limit 100
 |
 |--22:EXCHANGE [BROADCAST]
 |  |
-|  12:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
+|  10:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
 |     partitions=1/1 files=1 size=4.18KB
 |     predicates: n_name = 'SAUDI ARABIA'
 |
-13:HASH JOIN [INNER JOIN, BROADCAST]
+11:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: l1.l_suppkey = s_suppkey
 |
 |--21:EXCHANGE [BROADCAST]
@@ -2373,18 +2378,13 @@ limit 100
 |
 02:SUBPLAN
 |
-|--11:NESTED LOOP JOIN [CROSS JOIN]
+|--09:NESTED LOOP JOIN [CROSS JOIN]
 |  |
 |  |--03:SINGULAR ROW SRC
 |  |
 |  05:SUBPLAN
 |  |
-|  |--10:NESTED LOOP JOIN [LEFT SEMI JOIN]
-|  |  |  join predicates: l2.l_suppkey != l1.l_suppkey
-|  |  |
-|  |  |--08:UNNEST [o.o_lineitems l2]
-|  |  |
-|  |  09:NESTED LOOP JOIN [CROSS JOIN]
+|  |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |
 |  |  |--06:SINGULAR ROW SRC
 |  |  |
@@ -2441,24 +2441,24 @@ order by
 |  output: count(*), sum(c_acctbal)
 |  group by: substr(c_phone, 1, 2)
 |
-07:NESTED LOOP JOIN [INNER JOIN]
+07:SUBPLAN
+|
+|--05:NESTED LOOP JOIN [RIGHT ANTI JOIN]
+|  |
+|  |--03:SINGULAR ROW SRC
+|  |
+|  04:UNNEST [c.c_orders]
+|
+06:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: c_acctbal > avg(c_acctbal)
 |
-|--06:AGGREGATE [FINALIZE]
+|--02:AGGREGATE [FINALIZE]
 |  |  output: avg(c_acctbal)
 |  |
-|  05:SCAN HDFS [tpch_nested_parquet.customer c]
+|  01:SCAN HDFS [tpch_nested_parquet.customer c]
 |     partitions=1/1 files=4 size=577.87MB
 |     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |
-01:SUBPLAN
-|
-|--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
-|  |
-|  |--02:SINGULAR ROW SRC
-|  |
-|  03:UNNEST [c.c_orders]
-|
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    partitions=1/1 files=4 size=577.87MB
    predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
@@ -2479,7 +2479,15 @@ order by
 |  output: count(*), sum(c_acctbal)
 |  group by: substr(c_phone, 1, 2)
 |
-07:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+07:SUBPLAN
+|
+|--05:NESTED LOOP JOIN [RIGHT ANTI JOIN]
+|  |
+|  |--03:SINGULAR ROW SRC
+|  |
+|  04:UNNEST [c.c_orders]
+|
+06:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
 |  predicates: c_acctbal > avg(c_acctbal)
 |
 |--12:EXCHANGE [BROADCAST]
@@ -2489,21 +2497,13 @@ order by
 |  |
 |  10:EXCHANGE [UNPARTITIONED]
 |  |
-|  06:AGGREGATE
+|  02:AGGREGATE
 |  |  output: avg(c_acctbal)
 |  |
-|  05:SCAN HDFS [tpch_nested_parquet.customer c]
+|  01:SCAN HDFS [tpch_nested_parquet.customer c]
 |     partitions=1/1 files=4 size=577.87MB
 |     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |
-01:SUBPLAN
-|
-|--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
-|  |
-|  |--02:SINGULAR ROW SRC
-|  |
-|  03:UNNEST [c.c_orders]
-|
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    partitions=1/1 files=4 size=577.87MB
    predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')