You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/16 00:30:46 UTC

svn commit: r1618290 - in /hive/branches/cbo/ql/src: java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/ java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/

Author: gunther
Date: Fri Aug 15 22:30:46 2014
New Revision: 1618290

URL: http://svn.apache.org/r1618290
Log:
HIVE-7742: CBO: Predicate Push Down to Honor Hive Join Condition restrictions (Laljo John Pullokkaran via Gunther Hagleitner)

Modified:
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
    hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java Fri Aug 15 22:30:46 2014
@@ -1,6 +1,7 @@
 package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
 
 import java.util.ArrayList;
+import java.util.BitSet;
 import java.util.List;
 import java.util.ListIterator;
 
@@ -15,6 +16,7 @@ import org.eigenbase.relopt.RelOptRule;
 import org.eigenbase.relopt.RelOptRuleCall;
 import org.eigenbase.relopt.RelOptRuleOperand;
 import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
 import org.eigenbase.rex.RexBuilder;
 import org.eigenbase.rex.RexCall;
 import org.eigenbase.rex.RexNode;
@@ -113,7 +115,7 @@ public abstract class HivePushFilterPast
     final Holder<JoinRelType> joinTypeHolder = Holder.of(join.getJoinType());
     if (RelOptUtil.classifyFilters(join, aboveFilters,
         join.getJoinType(), !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType()
-        .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) {
+        .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) {
       filterPushed = true;
     }
 
@@ -128,7 +130,18 @@ public abstract class HivePushFilterPast
       if (exp instanceof RexCall) {
         RexCall c = (RexCall) exp;
         if (c.getOperator().getKind() == SqlKind.EQUALS) {
-          continue;
+          boolean validHiveJoinFilter = true;
+          for (RexNode rn : c.getOperands()) {
+            // NOTE: Hive dis-allows projections from both left & right side
+            // of join condition. Example: Hive disallows
+            // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+            if (filterRefersToBothSidesOfJoin(rn, join)) {
+              validHiveJoinFilter = false;
+              break;
+            }
+          }
+          if (validHiveJoinFilter)
+            continue;
         }
       }
       aboveFilters.add(exp);
@@ -148,7 +161,7 @@ public abstract class HivePushFilterPast
     // not on the side which is preserved.
     if (RelOptUtil.classifyFilters(join, joinFilters, null, !join
         .getJoinType().generatesNullsOnRight(), !join.getJoinType()
-        .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) {
+        .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) {
       filterPushed = true;
     }
 
@@ -240,6 +253,25 @@ public abstract class HivePushFilterPast
     }
     return predicate.isAlwaysTrue();
   }
+
+  private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
+    boolean refersToBothSides = false;
+
+    int joinNoOfProjects = j.getRowType().getFieldCount();
+    BitSet filterProjs = new BitSet(joinNoOfProjects);
+    BitSet allLeftProjs = new BitSet(joinNoOfProjects);
+    BitSet allRightProjs = new BitSet(joinNoOfProjects);
+    allLeftProjs.set(0, j.getInput(0).getRowType().getFieldCount(), true);
+    allRightProjs.set(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects, true);
+
+    InputFinder inputFinder = new InputFinder(filterProjs);
+    filter.accept(inputFinder);
+
+    if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+      refersToBothSides = true;
+
+    return refersToBothSides;
+  }
 }
 
 // End PushFilterPastJoinRule.java

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Aug 15 22:30:46 2014
@@ -11861,6 +11861,7 @@ public class SemanticAnalyzer extends Ba
       cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
 
       RelNode rootRel = optiqPreCboPlan;
+      hepPlanner.setRoot(rootRel);
       if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) {
         rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits);
       }

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q Fri Aug 15 22:30:46 2014
@@ -205,9 +205,9 @@ with q1 as ( select c_int from q2 where 
 q2 as ( select c_int,c_boolean from v1  where value = '1')
 select sum(c_int) from (select c_int from q1) a;
 
-with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
-q2 as ( select c_int,c_boolean from v1  where value = '1')
-select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
+--with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
+--q2 as ( select c_int,c_boolean from v1  where value = '1')
+--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
 
 
 drop view v1;

Modified: hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1618290&r1=1618289&r2=1618290&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out Fri Aug 15 22:30:46 2014
@@ -7190,7 +7190,7 @@ null	NULL	null	NULL
 null	NULL	null	NULL
 null	NULL	null	NULL
 PREHOOK: query: -- 5. Test Select + Join + FIL + TS
-select t1.c_int, t2.c_int from t1 join             t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
+select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
 PREHOOK: Input: default@t1@dt=2014
@@ -7198,7 +7198,7 @@ PREHOOK: Input: default@t2
 PREHOOK: Input: default@t2@dt=2014
 #### A masked pattern was here ####
 POSTHOOK: query: -- 5. Test Select + Join + FIL + TS
-select t1.c_int, t2.c_int from t1 join             t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
+select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t1@dt=2014
@@ -15963,30 +15963,21 @@ POSTHOOK: Input: default@t1@dt=2014
 POSTHOOK: Input: default@v1
 #### A masked pattern was here ####
 2
-PREHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
-q2 as ( select c_int,c_boolean from v1  where value = '1')
-select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Input: default@t1@dt=2014
-PREHOOK: Input: default@v1
-PREHOOK: Input: default@v4
-#### A masked pattern was here ####
-POSTHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
-q2 as ( select c_int,c_boolean from v1  where value = '1')
-select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Input: default@t1@dt=2014
-POSTHOOK: Input: default@v1
-POSTHOOK: Input: default@v4
-#### A masked pattern was here ####
-31104
-PREHOOK: query: drop view v1
+PREHOOK: query: --with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
+--q2 as ( select c_int,c_boolean from v1  where value = '1')
+--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
+
+
+drop view v1
 PREHOOK: type: DROPVIEW
 PREHOOK: Input: default@v1
 PREHOOK: Output: default@v1
-POSTHOOK: query: drop view v1
+POSTHOOK: query: --with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int),
+--q2 as ( select c_int,c_boolean from v1  where value = '1')
+--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int;
+
+
+drop view v1
 POSTHOOK: type: DROPVIEW
 POSTHOOK: Input: default@v1
 POSTHOOK: Output: default@v1