You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/12/15 23:00:49 UTC
[23/50] [abbrv] incubator-impala git commit: IMPALA-3126:
Conservative assignment of inner-join On-clause predicates.
IMPALA-3126: Conservative assignment of inner-join On-clause predicates.
Implements the following conservative but correct policy for assigning
predicates from the On-clause of an inner join:
If the predicate references an outer-joined tuple, then evaluate it at
the inner join that the On-clause belongs to.
Cleans up Analyzer.canEvalPredicate().
Change-Id: Idf45323ed9102ffb45c9d94a130ea3692286f215
Reviewed-on: http://gerrit.cloudera.org:8080/4982
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/80f85179
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/80f85179
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/80f85179
Branch: refs/heads/hadoop-next
Commit: 80f85179f99ff36d6ecad65b2041b45015ffb716
Parents: cc57a22
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Nov 7 14:15:45 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Dec 9 02:12:46 2016 +0000
----------------------------------------------------------------------
.../org/apache/impala/analysis/Analyzer.java | 101 ++++++++++---------
.../queries/PlannerTest/outer-joins.test | 72 ++++++++++++-
2 files changed, 121 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/80f85179/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 1e88862..61d1c20 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1195,6 +1195,10 @@ public class Analyzer {
return globalState_.ijClauseByConjunct.containsKey(e.getId());
}
+ public boolean isSjConjunct(Expr e) {
+ return globalState_.sjClauseByConjunct.containsKey(e.getId());
+ }
+
public TableRef getFullOuterJoinRef(Expr e) {
return globalState_.fullOuterJoinedConjuncts.get(e.getId());
}
@@ -1353,12 +1357,24 @@ public class Analyzer {
/**
* Returns true if predicate 'e' can be correctly evaluated by a tree materializing
* 'tupleIds', otherwise false:
- * - the predicate needs to be bound by tupleIds
- * - an On clause predicate against the non-nullable side of an Outer Join clause
- * can only be correctly evaluated by the join node that materializes the
- * Outer Join clause
- * - otherwise, a predicate can only be correctly evaluated if for all outer-joined
- * referenced tids the last join to outer-join this tid has been materialized
+ * - The predicate needs to be bound by tupleIds.
+ * - For On-clause predicates:
+ * - If the predicate is from an anti-join On-clause it must be evaluated by the
+ * corresponding anti-join node.
+ * - Predicates from the On-clause of an inner or semi join are evaluated at the
+ * node that materializes the required tuple ids, unless they reference outer
+ * joined tuple ids. In that case, the predicates are evaluated at the join node
+ * of the corresponding On-clause.
+ * - Predicates referencing full-outer joined tuples are assigned at the originating
+ * join if it is a full-outer join, otherwise at the last full-outer join that does
+ * not materialize the table ref ids of the originating join.
+ * - Predicates from the On-clause of a left/right outer join are assigned at
+ * the corresponding outer join node with the exception of simple predicates
+ * that only reference a single tuple id. Those may be assigned below the
+ * outer join node if they are from the same On-clause that makes the tuple id
+ * nullable.
+ * - Otherwise, a predicate can only be correctly evaluated if for all outer-joined
+ * referenced tids the last join to outer-join this tid has been materialized.
*/
public boolean canEvalPredicate(List<TupleId> tupleIds, Expr e) {
if (!e.isBoundByTupleIds(tupleIds)) return false;
@@ -1367,58 +1383,43 @@ public class Analyzer {
if (tids.isEmpty()) return true;
if (e.isOnClauseConjunct()) {
- if (tids.size() > 1) {
- // If the conjunct is from the ON-clause of an anti join, check if we can
- // assign it to this node.
- if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
- // bail if this is from an OJ On clause; the join node will pick
- // it up later via getUnassignedOjConjuncts()
- if (globalState_.ojClauseByConjunct.containsKey(e.getId())) return false;
- // If this is not from an OJ On clause (e.g. where clause or On clause of an
- // inner join) and is full-outer joined, we need to make sure it is not
- // assigned below the full outer join node that outer-joined it.
- return canEvalFullOuterJoinedConjunct(e, tupleIds);
+ if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
+
+ if (isIjConjunct(e) || isSjConjunct(e)) {
+ if (!containsOuterJoinedTid(tids)) return true;
+ // If the predicate references an outer-joined tuple, then evaluate it at
+ // the join that the On-clause belongs to.
+ TableRef onClauseTableRef = null;
+ if (isIjConjunct(e)) {
+ onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
+ } else {
+ onClauseTableRef = globalState_.sjClauseByConjunct.get(e.getId());
+ }
+ Preconditions.checkNotNull(onClauseTableRef);
+ return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
}
- TupleId tid = tids.get(0);
- if (globalState_.ojClauseByConjunct.containsKey(e.getId())) {
- // OJ On-clause predicate: okay if it's from
- // the same On clause that makes tid nullable
- // (otherwise e needn't be true when that tuple is set)
- if (!globalState_.outerJoinedTupleIds.containsKey(tid)) return false;
- if (globalState_.ojClauseByConjunct.get(e.getId())
- != globalState_.outerJoinedTupleIds.get(tid)) {
- return false;
- }
- // Single tuple id conjuncts specified in the FOJ On-clause are not allowed to be
- // assigned below that full outer join in the operator tree.
- TableRef tblRef = globalState_.ojClauseByConjunct.get(e.getId());
- if (tblRef.getJoinOp().isFullOuterJoin()) return false;
- } else {
- // Non-OJ On-clause conjunct.
- if (isOuterJoined(tid)) {
- // If the conjunct references an outer-joined tuple, then evaluate the
- // conjunct at the join that the On-clause belongs to.
- TableRef onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
- Preconditions.checkNotNull(onClauseTableRef);
- return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
- }
- // If this single tid conjunct is from the On-clause of an anti-join, check if we
- // can assign it to this node.
- if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
+ if (isFullOuterJoined(e)) return canEvalFullOuterJoinedConjunct(e, tupleIds);
+ if (isOjConjunct(e)) {
+ // Force this predicate to be evaluated by the corresponding outer join node.
+ // The join node will pick up the predicate later via getUnassignedOjConjuncts().
+ if (tids.size() > 1) return false;
+ // Optimization for single-tid predicates: Legal to assign below the outer join
+ // if the predicate is from the same On-clause that makes tid nullable
+ // (otherwise e needn't be true when that tuple is set).
+ TupleId tid = tids.get(0);
+ return globalState_.ojClauseByConjunct.get(e.getId()) == getLastOjClause(tid);
}
- // Single tid predicate that is not from an OJ On-clause and is outer-joined by a
- // full outer join cannot be assigned below that full outer join in the
- // operator tree.
- return canEvalFullOuterJoinedConjunct(e, tupleIds);
+
+ // Should have returned in one of the cases above.
+ Preconditions.checkState(false);
}
- if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
for (TupleId tid: tids) {
TableRef rhsRef = getLastOjClause(tid);
- // this is not outer-joined; ignore
+ // Ignore 'tid' because it is not outer-joined.
if (rhsRef == null) continue;
- // check whether the last join to outer-join 'tid' is materialized by tupleIds
+ // Check whether the last join to outer-join 'tid' is materialized by tupleIds.
if (!tupleIds.containsAll(rhsRef.getAllTableRefIds())) return false;
}
return true;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/80f85179/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
index 95d16f8..5b82c2d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
@@ -582,7 +582,7 @@ PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
| hash predicates: b.smallint_col = c.smallint_col
-| other predicates: b.id < 10
+| other predicates: a.int_col < b.int_col, b.id < 10
| runtime filters: RF000 <- c.smallint_col
|
|--02:SCAN HDFS [functional.alltypes c]
@@ -590,7 +590,6 @@ PLAN-ROOT SINK
|
04:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.id = b.id
-| other predicates: a.int_col < b.int_col
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
@@ -948,3 +947,72 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
====
+# IMPALA-3126: Test assignment of an inner join On-clause predicate. The predicate
+# may not be assigned below the join materializing 'd'.
+select 1 from functional.alltypes a
+left outer join functional.alltypes b
+ on a.id = b.id
+right outer join functional.alltypes c
+ on b.id = c.id
+inner join functional.alltypes d
+ on a.int_col = b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+06:NESTED LOOP JOIN [INNER JOIN]
+| predicates: a.int_col = b.int_col
+|
+|--03:SCAN HDFS [functional.alltypes d]
+| partitions=24/24 files=24 size=478.45KB
+|
+05:HASH JOIN [RIGHT OUTER JOIN]
+| hash predicates: b.id = c.id
+| runtime filters: RF000 <- c.id
+|
+|--02:SCAN HDFS [functional.alltypes c]
+| partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [LEFT OUTER JOIN]
+| hash predicates: a.id = b.id
+|
+|--01:SCAN HDFS [functional.alltypes b]
+| partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF000 -> b.id
+|
+00:SCAN HDFS [functional.alltypes a]
+ partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-3126: Same as above but with a semi join at the end.
+select 1 from functional.alltypes a
+left outer join functional.alltypes b
+ on a.id = b.id
+right outer join functional.alltypes c
+ on b.id = c.id
+left semi join functional.alltypes d
+ on a.int_col = b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+06:NESTED LOOP JOIN [LEFT SEMI JOIN]
+| join predicates: a.int_col = b.int_col
+|
+|--03:SCAN HDFS [functional.alltypes d]
+| partitions=24/24 files=24 size=478.45KB
+|
+05:HASH JOIN [RIGHT OUTER JOIN]
+| hash predicates: b.id = c.id
+| runtime filters: RF000 <- c.id
+|
+|--02:SCAN HDFS [functional.alltypes c]
+| partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [LEFT OUTER JOIN]
+| hash predicates: a.id = b.id
+|
+|--01:SCAN HDFS [functional.alltypes b]
+| partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF000 -> b.id
+|
+00:SCAN HDFS [functional.alltypes a]
+ partitions=24/24 files=24 size=478.45KB
+====