You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/16 00:30:46 UTC
svn commit: r1618290 - in /hive/branches/cbo/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/
java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/
test/results/clientpositive/
Author: gunther
Date: Fri Aug 15 22:30:46 2014
New Revision: 1618290
URL: http://svn.apache.org/r1618290
Log:
HIVE-7742: CBO: Predicate Push Down to Honor Hive Join Condition restrictions (Laljo John Pullokkaran via Gunther Hagleitner)
Modified:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java Fri Aug 15 22:30:46 2014
@@ -1,6 +1,7 @@
package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
import java.util.ArrayList;
+import java.util.BitSet;
import java.util.List;
import java.util.ListIterator;
@@ -15,6 +16,7 @@ import org.eigenbase.relopt.RelOptRule;
import org.eigenbase.relopt.RelOptRuleCall;
import org.eigenbase.relopt.RelOptRuleOperand;
import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
import org.eigenbase.rex.RexBuilder;
import org.eigenbase.rex.RexCall;
import org.eigenbase.rex.RexNode;
@@ -113,7 +115,7 @@ public abstract class HivePushFilterPast
final Holder<JoinRelType> joinTypeHolder = Holder.of(join.getJoinType());
if (RelOptUtil.classifyFilters(join, aboveFilters,
join.getJoinType(), !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType()
- .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) {
+ .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) {
filterPushed = true;
}
@@ -128,7 +130,18 @@ public abstract class HivePushFilterPast
if (exp instanceof RexCall) {
RexCall c = (RexCall) exp;
if (c.getOperator().getKind() == SqlKind.EQUALS) {
- continue;
+ boolean validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left & right side
+ // of join condition. Example: Hive disallows
+ // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ if (validHiveJoinFilter)
+ continue;
}
}
aboveFilters.add(exp);
@@ -148,7 +161,7 @@ public abstract class HivePushFilterPast
// not on the side which is preserved.
if (RelOptUtil.classifyFilters(join, joinFilters, null, !join
.getJoinType().generatesNullsOnRight(), !join.getJoinType()
- .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) {
+ .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) {
filterPushed = true;
}
@@ -240,6 +253,25 @@ public abstract class HivePushFilterPast
}
return predicate.isAlwaysTrue();
}
+
+ private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
+ boolean refersToBothSides = false;
+
+ int joinNoOfProjects = j.getRowType().getFieldCount();
+ BitSet filterProjs = new BitSet(joinNoOfProjects);
+ BitSet allLeftProjs = new BitSet(joinNoOfProjects);
+ BitSet allRightProjs = new BitSet(joinNoOfProjects);
+ allLeftProjs.set(0, j.getInput(0).getRowType().getFieldCount(), true);
+ allRightProjs.set(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects, true);
+
+ InputFinder inputFinder = new InputFinder(filterProjs);
+ filter.accept(inputFinder);
+
+ if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+ refersToBothSides = true;
+
+ return refersToBothSides;
+ }
}
// End PushFilterPastJoinRule.java
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Aug 15 22:30:46 2014
@@ -11861,6 +11861,7 @@ public class SemanticAnalyzer extends Ba
cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
RelNode rootRel = optiqPreCboPlan;
+ hepPlanner.setRoot(rootRel);
if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) {
rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits);
}
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q Fri Aug 15 22:30:46 2014
@@ -205,9 +205,9 @@ with q1 as ( select c_int from q2 where
q2 as ( select c_int,c_boolean from v1 where value = '1')
select sum(c_int) from (select c_int from q1) a;
-with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
-q2 as ( select c_int,c_boolean from v1 where value = '1')
-select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
+--with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
+--q2 as ( select c_int,c_boolean from v1 where value = '1')
+--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
drop view v1;
Modified: hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out Fri Aug 15 22:30:46 2014
@@ -7190,7 +7190,7 @@ null NULL null NULL
null NULL null NULL
null NULL null NULL
PREHOOK: query: -- 5. Test Select + Join + FIL + TS
-select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
+select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t1@dt=2014
@@ -7198,7 +7198,7 @@ PREHOOK: Input: default@t2
PREHOOK: Input: default@t2@dt=2014
#### A masked pattern was here ####
POSTHOOK: query: -- 5. Test Select + Join + FIL + TS
-select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
+select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t1@dt=2014
@@ -15963,30 +15963,21 @@ POSTHOOK: Input: default@t1@dt=2014
POSTHOOK: Input: default@v1
#### A masked pattern was here ####
2
-PREHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
-q2 as ( select c_int,c_boolean from v1 where value = '1')
-select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Input: default@t1@dt=2014
-PREHOOK: Input: default@v1
-PREHOOK: Input: default@v4
-#### A masked pattern was here ####
-POSTHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
-q2 as ( select c_int,c_boolean from v1 where value = '1')
-select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Input: default@t1@dt=2014
-POSTHOOK: Input: default@v1
-POSTHOOK: Input: default@v4
-#### A masked pattern was here ####
-31104
-PREHOOK: query: drop view v1
+PREHOOK: query: --with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
+--q2 as ( select c_int,c_boolean from v1 where value = '1')
+--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
+
+
+drop view v1
PREHOOK: type: DROPVIEW
PREHOOK: Input: default@v1
PREHOOK: Output: default@v1
-POSTHOOK: query: drop view v1
+POSTHOOK: query: --with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
+--q2 as ( select c_int,c_boolean from v1 where value = '1')
+--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
+
+
+drop view v1
POSTHOOK: type: DROPVIEW
POSTHOOK: Input: default@v1
POSTHOOK: Output: default@v1