You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/02/07 20:59:32 UTC

[64/70] [abbrv] hive git commit: HIVE-15808: Remove semijoin reduction branch if it is on bigtable along with hash join (Deepak Jaiswal, reviewed by Jason Dere)

HIVE-15808: Remove semijoin reduction branch if it is on bigtable along with hash join (Deepak Jaiswal, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f6cdbc87
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f6cdbc87
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f6cdbc87

Branch: refs/heads/hive-14535
Commit: f6cdbc87955aa5cdb83f174a73db9a7d8071f78b
Parents: 3ed7dc2
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Tue Feb 7 11:11:09 2017 -0800
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Tue Feb 7 11:11:09 2017 -0800

----------------------------------------------------------------------
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   | 64 +++++++++++---------
 .../hadoop/hive/ql/parse/GenTezUtils.java       |  8 +--
 2 files changed, 39 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f6cdbc87/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 0f9e86b..e3b293a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -775,51 +775,57 @@ public class ConvertJoinMapJoin implements NodeProcessor {
     return mapJoinOp;
   }
 
-  // Remove any semijoin branch associated with mapjoin's parent's operator
-  // pipeline which can cause a cycle after mapjoin optimization.
+  // Remove any semijoin branch associated with hashjoin's parent's operator
+  // pipeline which can cause a cycle after hashjoin optimization.
   private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp,
                                               Operator<?> parentSelectOpOfBigTable,
                                               ParseContext parseContext) throws SemanticException {
-    boolean semiJoinCycle = false;
-    ReduceSinkOperator rs = null;
-    TableScanOperator ts = null;
+    Map<ReduceSinkOperator, TableScanOperator> semiJoinMap =
+            new HashMap<ReduceSinkOperator, TableScanOperator>();
     for (Operator<?> op : parentSelectOpOfBigTable.getChildOperators()) {
       if (!(op instanceof SelectOperator)) {
         continue;
       }
 
-      while (op.getChildOperators().size() > 0 ) {
+      while (op.getChildOperators().size() > 0) {
         op = op.getChildOperators().get(0);
-        if (!(op instanceof ReduceSinkOperator)) {
-          continue;
-        }
-        rs = (ReduceSinkOperator) op;
-        ts = parseContext.getRsOpToTsOpMap().get(rs);
-        if (ts == null) {
+      }
+
+      // If not ReduceSink Op, skip
+      if (!(op instanceof ReduceSinkOperator)) {
+        continue;
+      }
+
+      ReduceSinkOperator rs = (ReduceSinkOperator) op;
+      TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs);
+      if (ts == null) {
+        // skip, no semijoin branch
+        continue;
+      }
+
+      // Found a semijoin branch.
+      for (Operator<?> parent : mapjoinOp.getParentOperators()) {
+        if (!(parent instanceof ReduceSinkOperator)) {
           continue;
         }
-        for (Operator<?> parent : mapjoinOp.getParentOperators()) {
-          if (!(parent instanceof ReduceSinkOperator)) {
-            continue;
-          }
 
-          Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent,
-                  TableScanOperator.class);
-          for (TableScanOperator parentTS : tsOps) {
-            // If the parent is same as the ts, then we have a cycle.
-            if (ts == parentTS) {
-              semiJoinCycle = true;
-              break;
-            }
+        Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent,
+                TableScanOperator.class);
+        for (TableScanOperator parentTS : tsOps) {
+          // If the parent is same as the ts, then we have a cycle.
+          if (ts == parentTS) {
+            semiJoinMap.put(rs, ts);
+            break;
           }
         }
       }
     }
-
-    // By design there can be atmost 1 such cycle.
-    if (semiJoinCycle) {
-      GenTezUtils.removeBranch(rs);
-      GenTezUtils.removeSemiJoinOperator(parseContext, rs, ts);
+    if (semiJoinMap.size() > 0) {
+      for (ReduceSinkOperator rs : semiJoinMap.keySet()) {
+        GenTezUtils.removeBranch(rs);
+        GenTezUtils.removeSemiJoinOperator(parseContext, rs,
+                semiJoinMap.get(rs));
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f6cdbc87/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index aee74ad..7f5fdff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -265,7 +265,6 @@ public class GenTezUtils {
           for (ReduceSinkOperator rs : rsOpToTsOpMap.keySet()) {
             if (rsOpToTsOpMap.get(rs) == orig) {
               rsOpToTsOpMap.put(rs, (TableScanOperator) newRoot);
-              break;
             }
           }
         }
@@ -569,7 +568,8 @@ public class GenTezUtils {
             TypeInfoFactory.booleanTypeInfo, Boolean.TRUE);
     DynamicValuePredicateContext filterDynamicValuePredicatesCollection =
             new DynamicValuePredicateContext();
-    collectDynamicValuePredicates(((FilterOperator)(ts.getChildOperators().get(0))).getConf().getPredicate(),
+    FilterDesc filterDesc = ((FilterOperator)(ts.getChildOperators().get(0))).getConf();
+    collectDynamicValuePredicates(filterDesc.getPredicate(),
             filterDynamicValuePredicatesCollection);
     for (ExprNodeDesc nodeToRemove : filterDynamicValuePredicatesCollection
             .childParentMapping.keySet()) {
@@ -594,8 +594,8 @@ public class GenTezUtils {
         ExprNodeDesc nodeParent = filterDynamicValuePredicatesCollection
                 .childParentMapping.get(nodeToRemove);
         if (nodeParent == null) {
-          // This was the only predicate, set filter expression to null
-          ts.getConf().setFilterExpr(null);
+          // This was the only predicate, set filter expression to const
+          filterDesc.setPredicate(constNode);
         } else {
           int i = nodeParent.getChildren().indexOf(nodeToRemove);
           nodeParent.getChildren().remove(i);