You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/05/11 04:38:08 UTC

hive git commit: HIVE-16610: Semijoin Hint : Should be able to handle more than one hint per alias (Deepak Jaiswal, reviewed by Jason Dere)

Repository: hive
Updated Branches:
  refs/heads/master fd6f8da68 -> ee91b8ec5


HIVE-16610: Semijoin Hint : Should be able to handle more than one hint per alias (Deepak Jaiswal, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ee91b8ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ee91b8ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ee91b8ec

Branch: refs/heads/master
Commit: ee91b8ec51795803c4c2817c6c2ced8f933f4b86
Parents: fd6f8da
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed May 10 21:37:34 2017 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Wed May 10 21:37:34 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 -
 .../DynamicPartitionPruningOptimization.java    | 129 ++++++++-----
 .../hadoop/hive/ql/parse/ParseContext.java      |   6 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  88 ++++-----
 .../hadoop/hive/ql/parse/SemiJoinHint.java      |   8 +-
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java  |   7 +-
 .../test/queries/clientpositive/semijoin_hint.q |  27 ++-
 .../llap/dynamic_semijoin_reduction.q.out       |   4 +-
 .../llap/dynamic_semijoin_reduction_2.q.out     |   4 +-
 .../llap/dynamic_semijoin_user_level.q.out      |   2 +-
 .../clientpositive/llap/semijoin_hint.q.out     | 185 ++++++-------------
 11 files changed, 199 insertions(+), 263 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 99c26ce..73e0290 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2907,8 +2907,6 @@ public class HiveConf extends Configuration {
             "Big table for runtime filteting should be of atleast this size"),
     TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50,
             "Only perform semijoin optimization if the estimated benefit at or above this fraction of the target table"),
-    TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY("hive.tez.dynamic.semijoin.reduction.hint.only", false,
-            "When true, only enforce semijoin when a hint is provided"),
     TEZ_SMB_NUMBER_WAVES(
         "hive.tez.smb.number.waves",
         (float) 0.5,

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index f56b9cb..8a62982 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.exec.*;
@@ -212,36 +213,38 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
           if (semiJoin && ts.getConf().getFilterExpr() != null) {
             LOG.debug("Initiate semijoin reduction for " + column + " ("
                 + ts.getConf().getFilterExpr().getExprString());
-            // Get the table name from which the min-max values and bloom filter will come.
-            Operator<?> op = ctx.generator;
-
-            while (!(op == null || op instanceof TableScanOperator)) {
-              op = op.getParentOperators().get(0);
-            }
-            String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
 
             StringBuilder internalColNameBuilder = new StringBuilder();
             StringBuilder colNameBuilder = new StringBuilder();
-            if (getColumnName(ctx, internalColNameBuilder, colNameBuilder)) {
+
+            // Apply best effort to fetch the correct table alias. If not
+            // found, fallback to old logic.
+            StringBuilder tabAliasBuilder = new StringBuilder();
+            if (getColumnInfo(ctx, internalColNameBuilder, colNameBuilder, tabAliasBuilder)) {
               String colName = colNameBuilder.toString();
+              String tableAlias;
+              if (tabAliasBuilder.length() > 0) {
+                tableAlias = tabAliasBuilder.toString();
+              } else {
+                Operator<?> op = ctx.generator;
+
+                while (!(op == null || op instanceof TableScanOperator)) {
+                  op = op.getParentOperators().get(0);
+                }
+                tableAlias = (op == null ? "" : ((TableScanOperator) op).
+                        getConf().getAlias());
+              }
+
+              // Use the tableAlias to generate keyBaseAlias
               keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias
                       + "_" + colName;
-              Map<String, SemiJoinHint> hints = parseContext.getSemiJoinHints();
+              Map<String, List<SemiJoinHint>> hints = parseContext.getSemiJoinHints();
               if (hints != null) {
-                if (hints.size() > 0) {
-                  SemiJoinHint sjHint = hints.get(tableAlias);
-                  if (sjHint != null && sjHint.getColName() != null &&
-                          !colName.equals(sjHint.getColName())) {
-                    LOG.debug("Removed hint due to column mismatch + Col = " + colName + " hint column = " + sjHint.getColName());
-                    sjHint = null;
-                  }
-                  semiJoinAttempted = generateSemiJoinOperatorPlan(
-                          ctx, parseContext, ts, keyBaseAlias,
-                          internalColNameBuilder.toString(), colName, sjHint);
-                  if (!semiJoinAttempted && sjHint != null) {
-                    throw new SemanticException("The user hint to enforce semijoin failed required conditions");
-                  }
-                }
+                // Create semijoin optimizations ONLY for hinted columns
+                semiJoinAttempted = processSemiJoinHints(
+                        parseContext, ctx, hints, tableAlias,
+                        internalColNameBuilder.toString(), colName, ts,
+                        keyBaseAlias);
               } else {
                 // fallback to regular logic
                 semiJoinAttempted = generateSemiJoinOperatorPlan(
@@ -297,16 +300,30 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
   }
 
   // Given a key, find the corresponding column name.
-  private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColName,
-                                StringBuilder colName) {
+  private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName,
+                                StringBuilder colName, StringBuilder tabAlias) {
     ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
     ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
 
     if (colExpr == null) {
       return false;
     }
-
     internalColName.append(colExpr.getColumn());
+
+    // fetch table ablias
+    ExprNodeDescUtils.ColumnOrigin columnOrigin =
+            ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator);
+
+    if (columnOrigin != null) {
+      // get both tableAlias and column name from columnOrigin
+      assert columnOrigin.op instanceof TableScanOperator;
+      TableScanOperator ts = (TableScanOperator) columnOrigin.op;
+      tabAlias.append(ts.getConf().getAlias());
+      colName.append(
+              ExprNodeDescUtils.getColumnExpr(columnOrigin.col).getColumn());
+      return true;
+    }
+
     Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
     if (!(parentOfRS instanceof SelectOperator)) {
       colName.append(internalColName.toString());
@@ -324,6 +341,37 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
     return true;
   }
 
+  // Handle hint based semijoin
+  private boolean processSemiJoinHints(
+          ParseContext pCtx, DynamicListContext ctx,
+          Map<String, List<SemiJoinHint>> hints, String tableAlias,
+          String internalColName, String colName, TableScanOperator ts,
+          String keyBaseAlias) throws SemanticException {
+    if (hints.size() == 0) {
+      return false;
+    }
+
+    List<SemiJoinHint> hintList = hints.get(tableAlias);
+    if (hintList == null) {
+      return false;
+    }
+
+    // Iterate through the list
+    for (SemiJoinHint sjHint : hintList) {
+      if (!colName.equals(sjHint.getColName())) {
+        continue;
+      }
+      // match!
+      LOG.info("Creating runtime filter due to user hint: column = " + colName);
+      if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias,
+              internalColName, colName, sjHint)) {
+        return true;
+      }
+      throw new SemanticException("The user hint to enforce semijoin failed required conditions");
+    }
+    return false;
+  }
+
   private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) {
     if (ctx.grandParent == null) {
       desc.setPredicate(node);
@@ -442,12 +490,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       TableScanOperator ts, String keyBaseAlias, String internalColName,
       String colName, SemiJoinHint sjHint) throws SemanticException {
 
-    // If semijoin hint is enforced, make sure hint is provided
-    if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY)
-            && sjHint == null) {
-        return false;
-    }
-
     // we will put a fork in the plan at the source of the reduce sink
     Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
 
@@ -457,23 +499,18 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
     assert colName != null;
     // Fetch the TableScan Operator.
     Operator<?> op = parentOfRS;
-    while (!(op == null || op instanceof TableScanOperator)) {
+    while (!(op == null || op instanceof TableScanOperator ||
+             op instanceof ReduceSinkOperator)) {
       op = op.getParentOperators().get(0);
     }
-    assert op != null;
-
-    Table table = ((TableScanOperator) op).getConf().getTableMetadata();
-    if (table.isPartitionKey(colName)) {
-      // The column is partition column, skip the optimization.
-      return false;
-    }
+    Preconditions.checkNotNull(op);
 
-    // If hint is provided and only hinted semijoin optimizations should be
-    // created, then skip other columns on the table
-    if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY)
-            && sjHint.getColName() != null &&
-            !internalColName.equals(sjHint.getColName())) {
-      return false;
+    if (op instanceof TableScanOperator) {
+      Table table = ((TableScanOperator) op).getConf().getTableMetadata();
+      if (table.isPartitionKey(colName)) {
+        // The column is partition column, skip the optimization.
+        return false;
+      }
     }
 
     // Check if there already exists a semijoin branch

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index 6de4bcd..565fbef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -123,7 +123,7 @@ public class ParseContext {
   private Map<ExprNodeDesc, GroupByOperator> colExprToGBMap =
           new HashMap<>();
 
-  private Map<String, SemiJoinHint> semiJoinHints;
+  private Map<String, List<SemiJoinHint>> semiJoinHints;
   public ParseContext() {
   }
 
@@ -674,11 +674,11 @@ public class ParseContext {
     return colExprToGBMap;
   }
 
-  public void setSemiJoinHints(Map<String, SemiJoinHint> hints) {
+  public void setSemiJoinHints(Map<String, List<SemiJoinHint>> hints) {
     this.semiJoinHints = hints;
   }
 
-  public Map<String, SemiJoinHint> getSemiJoinHints() {
+  public Map<String, List<SemiJoinHint>> getSemiJoinHints() {
     return semiJoinHints;
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5115fc8..ddf74f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9018,14 +9018,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   }
 
   /** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified.
-   *  Hints can be in 3 formats
+   *  Hints can be in 2 formats
    *  1. TableName, ColumnName, bloom filter entries
-   *  2. TableName, bloom filter entries, and
-   *  3. TableName, ColumnName
+   *  2. TableName, ColumnName
    *  */
-  private Map<String, SemiJoinHint> parseSemiJoinHint(List<ASTNode> hints) throws SemanticException {
+  private Map<String, List<SemiJoinHint>> parseSemiJoinHint(List<ASTNode> hints) throws SemanticException {
     if (hints == null || hints.size() == 0) return null;
-    Map<String, SemiJoinHint> result = null;
+    Map<String, List<SemiJoinHint>> result = null;
     for (ASTNode hintNode : hints) {
       for (Node node : hintNode.getChildren()) {
         ASTNode hint = (ASTNode) node;
@@ -9033,8 +9032,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         if (result == null) {
           result = new HashMap<>();
         }
-        String alias = null;
-        String colName = null;
         Tree args = hint.getChild(1);
         if (args.getChildCount() == 1) {
           String text = args.getChild(0).getText();
@@ -9043,46 +9040,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
             return result;
           }
         }
-        for (int i = 0; i < args.getChildCount(); i++) {
-          // We can have table names, column names or sizes here (or incorrect hint if the user is so inclined).
-          String text = args.getChild(i).getText();
-          Integer number = null;
-          try {
-            number = Integer.parseInt(text);
-          } catch (NumberFormatException ex) { // Ignore.
-          }
-          if (number != null) {
-            if (alias == null) {
-              throw new SemanticException("Invalid semijoin hint - arg " + i + " ("
-                      + text + ") is a number but the previous one is not an alias");
-            }
-            if (result.get(alias) != null) {
-              // A hint with same alias already present, throw
-              throw new SemanticException("A hint with alias " + alias +
-                      " already present. Please use unique aliases");
-            }
-            SemiJoinHint sjHint = new SemiJoinHint(alias, colName, number);
-            result.put(alias, sjHint);
-            alias = null;
-            colName = null;
-          } else {
-            if (alias == null) {
-              alias = text;
-            } else if (colName == null) {
-              colName = text;
-            } else {
-              // No bloom filter entries provided.
-              if (result.get(alias) != null) {
-                // A hint with same alias already present, throw
-                throw new SemanticException("A hint with alias " + alias +
-                        " already present. Please use unique aliases");
-              }
-              SemiJoinHint sjHint = new SemiJoinHint(alias, colName, null);
-              result.put(alias, sjHint);
-              alias = text;
-              colName = null;
-            }
-          }
+        int curIdx = 0;
+        while(curIdx < args.getChildCount()) {
+          curIdx = parseSingleSemiJoinHint(args, curIdx, result);
         }
       }
     }
@@ -9092,6 +9052,40 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return result;
   }
 
+  private int parseSingleSemiJoinHint(Tree args, int curIdx, Map<String, List<SemiJoinHint>> result)
+    throws SemanticException {
+    // Check if there are enough entries in the tree to constitute a hint.
+    int numEntriesLeft = args.getChildCount() - curIdx;
+    if (numEntriesLeft < 2) {
+      throw new SemanticException("User provided only 1 entry for the hint with alias "
+              + args.getChild(curIdx).getText());
+    }
+
+    String alias = args.getChild(curIdx++).getText();
+    // validate
+    if (StringUtils.isNumeric(alias)) {
+      throw new SemanticException("User provided bloom filter entries when alias is expected");
+    }
+
+    String colName = args.getChild(curIdx++).getText();
+    // validate
+    if (StringUtils.isNumeric(colName)) {
+      throw new SemanticException("User provided bloom filter entries when column name is expected");
+    }
+
+    Integer number = null;
+    if (numEntriesLeft > 2) {
+      // Check if there exists bloom filter size entry
+      try {
+        number = Integer.parseInt(args.getChild(curIdx).getText());
+        curIdx++;
+      } catch (NumberFormatException e) { // Ignore
+      }
+    }
+    result.computeIfAbsent(alias, value -> new ArrayList<>()).add(new SemiJoinHint(colName, number));
+    return curIdx;
+  }
+
   /**
    * Merges node to target
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
index 1f24e23..f7fd306 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
@@ -19,20 +19,14 @@
 package org.apache.hadoop.hive.ql.parse;
 
 public class SemiJoinHint {
-  private String tabAlias;
   private String colName;
   private Integer numEntries;
 
-  public SemiJoinHint(String tabAlias, String colName, Integer numEntries) {
-    this.tabAlias = tabAlias;
+  public SemiJoinHint(String colName, Integer numEntries) {
     this.colName = colName;
     this.numEntries = numEntries;
   }
 
-  public String getTabAlias() {
-    return tabAlias;
-  }
-
   public String getColName() {
     return colName;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 8701b2d..df3de03 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -854,14 +854,13 @@ public class ExprNodeDescUtils {
   }
 
   private static ExprNodeDesc findParentExpr(ExprNodeColumnDesc col, Operator<?> op) {
-    if (op instanceof ReduceSinkOperator) {
-      return col;
-    }
-
     ExprNodeDesc parentExpr = col;
     Map<String, ExprNodeDesc> mapping = op.getColumnExprMap();
     if (mapping != null) {
       parentExpr = mapping.get(col.getColumn());
+      if (parentExpr == null && op instanceof ReduceSinkOperator) {
+        return col;
+      }
     }
     return parentExpr;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/queries/clientpositive/semijoin_hint.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q
index 5fbc273..71fa445 100644
--- a/ql/src/test/queries/clientpositive/semijoin_hint.q
+++ b/ql/src/test/queries/clientpositive/semijoin_hint.q
@@ -42,7 +42,7 @@ set hive.cbo.returnpath.hiveop=true;
 -- disabling this test case for returnpath true as the aliases in case of union are mangled due to which hints are not excercised.
 --explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
 --        union all
---        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
+--        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
 
 -- Query which creates semijoin
 explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
@@ -50,21 +50,18 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k
 explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 
 EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
 
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 
-set hive.tez.dynamic.semijoin.reduction.hint.only=true;
--- This should NOT create a semijoin as the join is on different column
+-- This should NOT create a semijoin
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=false;
-
 
 set hive.cbo.returnpath.hiveop=false;
 
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
+        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
 
 -- Query which creates semijoin
 explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
@@ -72,21 +69,20 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k
 explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 
 EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
 
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 
-set hive.tez.dynamic.semijoin.reduction.hint.only=true;
--- This should NOT create a semijoin as the join is on different column
+-- This should NOT create a semijoin
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=false;
+
 
 
 set hive.cbo.enable=false;
 
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
+        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
 
 -- Query which creates semijoin
 explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
@@ -94,11 +90,10 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k
 explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 
 EXPLAIN select  /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
 
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
 
-set hive.tez.dynamic.semijoin.reduction.hint.only=true;
--- This should NOT create a semijoin as the join is on different column
+-- This should NOT create a semijoin
 explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=false;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
index e3ffcfa..35822f4 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
@@ -1402,10 +1402,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc_int
-                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean)
+                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean)
                   Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean)
+                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean)
                     Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cstring (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
index 650dc9f..1da1121 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -107,10 +107,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: tt2
-                  filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean)
+                  filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_9_min) AND DynamicValue(RS_23_t1_timestamp_col_9_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_9_bloom_filter)))) (type: boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                   Filter Operator
-                    predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean)
+                    predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_9_min) AND DynamicValue(RS_23_t1_timestamp_col_9_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_9_bloom_filter)))) (type: boolean)
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                     Select Operator
                       expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp)

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
index 0098b89..18659b3 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
@@ -761,7 +761,7 @@ Stage-0
                   Select Operator [SEL_8] (rows=9174 width=70)
                     Output:["_col0"]
                     Filter Operator [FIL_28] (rows=9174 width=70)
-                      predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter))))
+                      predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter))))
                       TableScan [TS_6] (rows=12288 width=70)
                         default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
                       <-Reducer 5 [BROADCAST_EDGE] llap

http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
index 3ffc235..ae9bf9b 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
@@ -400,23 +400,21 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 7 (BROADCAST_EDGE)
-        Map 8 <- Reducer 5 (BROADCAST_EDGE)
+        Map 7 <- Reducer 5 (BROADCAST_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-        Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
         Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
-        Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: i
-                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean)
+                  filterExpr: cstring is not null (type: boolean)
                   Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean)
+                    predicate: cstring is not null (type: boolean)
                     Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cstring (type: string)
@@ -448,29 +446,16 @@ STAGE PLANS:
                         Map-reduce partition columns: value (type: string)
                         Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: str (type: string)
-                      Select Operator
-                        expressions: value (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                            value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: v
-                  filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean)
+                  filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean)
                   Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                   Filter Operator
-                    predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean)
+                    predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean)
                     Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key1 (type: string)
@@ -504,7 +489,7 @@ STAGE PLANS:
                   outputColumnNames: _col0
                   Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -552,19 +537,7 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
-        Reducer 7 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -579,9 +552,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -962,11 +935,11 @@ STAGE PLANS:
 
 PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1430,23 +1403,21 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 6 (BROADCAST_EDGE)
-        Map 5 <- Reducer 8 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Map 7 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
         Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
-        Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: i
-                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean)
+                  filterExpr: cstring is not null (type: boolean)
                   Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean)
+                    predicate: cstring is not null (type: boolean)
                     Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cstring (type: string)
@@ -1459,14 +1430,14 @@ STAGE PLANS:
                         Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: all inputs
-        Map 5 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date
-                  filterExpr: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean)
+                  filterExpr: (str is not null and value is not null) (type: boolean)
                   Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean)
+                    predicate: (str is not null and value is not null) (type: boolean)
                     Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: str (type: string), value (type: string)
@@ -1478,29 +1449,16 @@ STAGE PLANS:
                         Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: string)
-                      Select Operator
-                        expressions: _col1 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
-                            value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
         Map 7 
             Map Operator Tree:
                 TableScan
                   alias: v
-                  filterExpr: key1 is not null (type: boolean)
+                  filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_srcpart_date_str_min) AND DynamicValue(RS_12_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_12_srcpart_date_str_bloom_filter)))) (type: boolean)
                   Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                   Filter Operator
-                    predicate: key1 is not null (type: boolean)
+                    predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_srcpart_date_str_min) AND DynamicValue(RS_12_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_12_srcpart_date_str_bloom_filter)))) (type: boolean)
                     Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key1 (type: string)
@@ -1511,19 +1469,6 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
-                      Select Operator
-                        expressions: _col0 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                            value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
@@ -1542,6 +1487,19 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: string)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1576,11 +1534,11 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1588,18 +1546,6 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
                   value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
-        Reducer 8 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
 
   Stage: Stage-0
     Fetch Operator
@@ -1607,9 +1553,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -1984,11 +1930,11 @@ STAGE PLANS:
 
 PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
         union all
-        select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+        select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2420,13 +2366,11 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 7 <- Reducer 6 (BROADCAST_EDGE)
-        Map 8 <- Reducer 5 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-        Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Map 6 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2459,7 +2403,7 @@ STAGE PLANS:
                           value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: llap
             LLAP IO: all inputs
-        Map 7 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: v
@@ -2475,14 +2419,14 @@ STAGE PLANS:
                       Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
             Execution mode: llap
             LLAP IO: all inputs
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: i
-                  filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean)
+                  filterExpr: cstring is not null (type: boolean)
                   Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean)
+                    predicate: cstring is not null (type: boolean)
                     Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: cstring (type: string)
@@ -2507,19 +2451,6 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL
-                Select Operator
-                  expressions: _col1 (type: string)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL
-                  Group By Operator
-                    aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                      value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -2558,18 +2489,6 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
-        Reducer 6 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
                 aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
@@ -2585,9 +2504,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select  /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select  /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage