You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/12/15 23:00:49 UTC

[23/50] [abbrv] incubator-impala git commit: IMPALA-3126: Conservative assignment of inner-join On-clause predicates.

IMPALA-3126: Conservative assignment of inner-join On-clause predicates.

Implements the following conservative but correct policy for assigning
predicates from the On-clause of an inner join:
If the predicate references an outer-joined tuple, then evaluate it at
the inner join that the On-clause belongs to.

Cleans up Analyzer.canEvalPredicate().

Change-Id: Idf45323ed9102ffb45c9d94a130ea3692286f215
Reviewed-on: http://gerrit.cloudera.org:8080/4982
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/80f85179
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/80f85179
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/80f85179

Branch: refs/heads/hadoop-next
Commit: 80f85179f99ff36d6ecad65b2041b45015ffb716
Parents: cc57a22
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Nov 7 14:15:45 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Dec 9 02:12:46 2016 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/Analyzer.java    | 101 ++++++++++---------
 .../queries/PlannerTest/outer-joins.test        |  72 ++++++++++++-
 2 files changed, 121 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/80f85179/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 1e88862..61d1c20 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1195,6 +1195,10 @@ public class Analyzer {
     return globalState_.ijClauseByConjunct.containsKey(e.getId());
   }
 
+  public boolean isSjConjunct(Expr e) {
+    return globalState_.sjClauseByConjunct.containsKey(e.getId());
+  }
+
   public TableRef getFullOuterJoinRef(Expr e) {
     return globalState_.fullOuterJoinedConjuncts.get(e.getId());
   }
@@ -1353,12 +1357,24 @@ public class Analyzer {
   /**
    * Returns true if predicate 'e' can be correctly evaluated by a tree materializing
    * 'tupleIds', otherwise false:
-   * - the predicate needs to be bound by tupleIds
-   * - an On clause predicate against the non-nullable side of an Outer Join clause
-   *   can only be correctly evaluated by the join node that materializes the
-   *   Outer Join clause
-   * - otherwise, a predicate can only be correctly evaluated if for all outer-joined
-   *   referenced tids the last join to outer-join this tid has been materialized
+   * - The predicate needs to be bound by tupleIds.
+   * - For On-clause predicates:
+   *   - If the predicate is from an anti-join On-clause it must be evaluated by the
+   *     corresponding anti-join node.
+   *   - Predicates from the On-clause of an inner or semi join are evaluated at the
+   *     node that materializes the required tuple ids, unless they reference outer
+   *     joined tuple ids. In that case, the predicates are evaluated at the join node
+   *     of the corresponding On-clause.
+   *   - Predicates referencing full-outer joined tuples are assigned at the originating
+   *     join if it is a full-outer join, otherwise at the last full-outer join that does
+   *     not materialize the table ref ids of the originating join.
+   *   - Predicates from the On-clause of a left/right outer join are assigned at
+   *     the corresponding outer join node with the exception of simple predicates
+   *     that only reference a single tuple id. Those may be assigned below the
+   *     outer join node if they are from the same On-clause that makes the tuple id
+   *     nullable.
+   * - Otherwise, a predicate can only be correctly evaluated if for all outer-joined
+   *   referenced tids the last join to outer-join this tid has been materialized.
    */
   public boolean canEvalPredicate(List<TupleId> tupleIds, Expr e) {
     if (!e.isBoundByTupleIds(tupleIds)) return false;
@@ -1367,58 +1383,43 @@ public class Analyzer {
     if (tids.isEmpty()) return true;
 
     if (e.isOnClauseConjunct()) {
-      if (tids.size() > 1) {
-        // If the conjunct is from the ON-clause of an anti join, check if we can
-        // assign it to this node.
-        if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
-        // bail if this is from an OJ On clause; the join node will pick
-        // it up later via getUnassignedOjConjuncts()
-        if (globalState_.ojClauseByConjunct.containsKey(e.getId())) return false;
-        // If this is not from an OJ On clause (e.g. where clause or On clause of an
-        // inner join) and is full-outer joined, we need to make sure it is not
-        // assigned below the full outer join node that outer-joined it.
-        return canEvalFullOuterJoinedConjunct(e, tupleIds);
+      if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
+
+      if (isIjConjunct(e) || isSjConjunct(e)) {
+        if (!containsOuterJoinedTid(tids)) return true;
+        // If the predicate references an outer-joined tuple, then evaluate it at
+        // the join that the On-clause belongs to.
+        TableRef onClauseTableRef = null;
+        if (isIjConjunct(e)) {
+          onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
+        } else {
+          onClauseTableRef = globalState_.sjClauseByConjunct.get(e.getId());
+        }
+        Preconditions.checkNotNull(onClauseTableRef);
+        return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
       }
 
-      TupleId tid = tids.get(0);
-      if (globalState_.ojClauseByConjunct.containsKey(e.getId())) {
-        // OJ On-clause predicate: okay if it's from
-        // the same On clause that makes tid nullable
-        // (otherwise e needn't be true when that tuple is set)
-        if (!globalState_.outerJoinedTupleIds.containsKey(tid)) return false;
-        if (globalState_.ojClauseByConjunct.get(e.getId())
-            != globalState_.outerJoinedTupleIds.get(tid)) {
-          return false;
-        }
-        // Single tuple id conjuncts specified in the FOJ On-clause are not allowed to be
-        // assigned below that full outer join in the operator tree.
-        TableRef tblRef = globalState_.ojClauseByConjunct.get(e.getId());
-        if (tblRef.getJoinOp().isFullOuterJoin()) return false;
-      } else {
-        // Non-OJ On-clause conjunct.
-        if (isOuterJoined(tid)) {
-          // If the conjunct references an outer-joined tuple, then evaluate the
-          // conjunct at the join that the On-clause belongs to.
-          TableRef onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
-          Preconditions.checkNotNull(onClauseTableRef);
-          return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
-        }
-        // If this single tid conjunct is from the On-clause of an anti-join, check if we
-        // can assign it to this node.
-        if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
+      if (isFullOuterJoined(e)) return canEvalFullOuterJoinedConjunct(e, tupleIds);
+      if (isOjConjunct(e)) {
+        // Force this predicate to be evaluated by the corresponding outer join node.
+        // The join node will pick up the predicate later via getUnassignedOjConjuncts().
+        if (tids.size() > 1) return false;
+        // Optimization for single-tid predicates: Legal to assign below the outer join
+        // if the predicate is from the same On-clause that makes tid nullable
+        // (otherwise e needn't be true when that tuple is set).
+        TupleId tid = tids.get(0);
+        return globalState_.ojClauseByConjunct.get(e.getId()) == getLastOjClause(tid);
       }
-      // Single tid predicate that is not from an OJ On-clause and is outer-joined by a
-      // full outer join cannot be assigned below that full outer join in the
-      // operator tree.
-      return canEvalFullOuterJoinedConjunct(e, tupleIds);
+
+      // Should have returned in one of the cases above.
+      Preconditions.checkState(false);
     }
-    if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
 
     for (TupleId tid: tids) {
       TableRef rhsRef = getLastOjClause(tid);
-      // this is not outer-joined; ignore
+      // Ignore 'tid' because it is not outer-joined.
       if (rhsRef == null) continue;
-      // check whether the last join to outer-join 'tid' is materialized by tupleIds
+      // Check whether the last join to outer-join 'tid' is materialized by tupleIds.
       if (!tupleIds.containsAll(rhsRef.getAllTableRefIds())) return false;
     }
     return true;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/80f85179/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
index 95d16f8..5b82c2d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
@@ -582,7 +582,7 @@ PLAN-ROOT SINK
 |
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: b.smallint_col = c.smallint_col
-|  other predicates: b.id < 10
+|  other predicates: a.int_col < b.int_col, b.id < 10
 |  runtime filters: RF000 <- c.smallint_col
 |
 |--02:SCAN HDFS [functional.alltypes c]
@@ -590,7 +590,6 @@ PLAN-ROOT SINK
 |
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.id = b.id
-|  other predicates: a.int_col < b.int_col
 |
 |--01:SCAN HDFS [functional.alltypes b]
 |     partitions=24/24 files=24 size=478.45KB
@@ -948,3 +947,72 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional.alltypes t1]
    partitions=24/24 files=24 size=478.45KB
 ====
+# IMPALA-3126: Test assignment of an inner join On-clause predicate. The predicate
+# may not be assigned below the join materializing 'd'.
+select 1 from functional.alltypes a
+left outer join functional.alltypes b
+  on a.id = b.id
+right outer join functional.alltypes c
+  on b.id = c.id
+inner join functional.alltypes d
+  on a.int_col = b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+06:NESTED LOOP JOIN [INNER JOIN]
+|  predicates: a.int_col = b.int_col
+|
+|--03:SCAN HDFS [functional.alltypes d]
+|     partitions=24/24 files=24 size=478.45KB
+|
+05:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: b.id = c.id
+|  runtime filters: RF000 <- c.id
+|
+|--02:SCAN HDFS [functional.alltypes c]
+|     partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: a.id = b.id
+|
+|--01:SCAN HDFS [functional.alltypes b]
+|     partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF000 -> b.id
+|
+00:SCAN HDFS [functional.alltypes a]
+   partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-3126: Same as above but with a semi join at the end.
+select 1 from functional.alltypes a
+left outer join functional.alltypes b
+  on a.id = b.id
+right outer join functional.alltypes c
+  on b.id = c.id
+left semi join functional.alltypes d
+  on a.int_col = b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+06:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|  join predicates: a.int_col = b.int_col
+|
+|--03:SCAN HDFS [functional.alltypes d]
+|     partitions=24/24 files=24 size=478.45KB
+|
+05:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: b.id = c.id
+|  runtime filters: RF000 <- c.id
+|
+|--02:SCAN HDFS [functional.alltypes c]
+|     partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: a.id = b.id
+|
+|--01:SCAN HDFS [functional.alltypes b]
+|     partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF000 -> b.id
+|
+00:SCAN HDFS [functional.alltypes a]
+   partitions=24/24 files=24 size=478.45KB
+====