You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2017/11/07 06:27:36 UTC

[17/17] hive git commit: HIVE-17767 Rewrite correlated EXISTS/IN subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)

HIVE-17767 Rewrite correlated EXISTS/IN subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aee0eaa0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aee0eaa0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aee0eaa0

Branch: refs/heads/master
Commit: aee0eaa032e94f4562c1f07940e4cb391c9bcafd
Parents: 7195aee
Author: Vineet Garg <vg...@apache.com>
Authored: Mon Nov 6 22:26:50 2017 -0800
Committer: Vineet Garg <vg...@apache.com>
Committed: Mon Nov 6 22:26:50 2017 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |    3 +-
 .../calcite/HiveSubQRemoveRelBuilder.java       |   28 +-
 .../calcite/rules/HiveRelDecorrelator.java      |  385 ++-
 .../calcite/rules/HiveSubQueryRemoveRule.java   |   20 +-
 .../translator/PlanModifierForASTConv.java      |    5 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |    5 +-
 .../queries/clientpositive/subquery_exists.q    |    6 +-
 .../test/queries/clientpositive/subquery_in.q   |   50 +-
 .../clientpositive/constprog_partitioner.q.out  |    6 +-
 .../llap/constprog_semijoin.q.out               |   40 +-
 .../llap/dynamic_semijoin_reduction_2.q.out     |   90 +-
 .../clientpositive/llap/explainuser_1.q.out     |  140 +-
 .../clientpositive/llap/subquery_exists.q.out   |  309 +-
 .../clientpositive/llap/subquery_in.q.out       | 1528 +++++++---
 .../llap/subquery_in_having.q.out               | 2079 +++++++++++++
 .../clientpositive/llap/subquery_multi.q.out    |  627 ++--
 .../clientpositive/llap/subquery_notin.q.out    |  170 +-
 .../clientpositive/llap/subquery_scalar.q.out   |  103 +-
 .../clientpositive/llap/subquery_select.q.out   |  162 +-
 .../clientpositive/llap/subquery_views.q.out    |   82 +-
 .../llap/vector_mapjoin_reduce.q.out            |   29 +-
 .../results/clientpositive/masking_12.q.out     |   43 +-
 .../test/results/clientpositive/masking_3.q.out |  237 +-
 .../test/results/clientpositive/masking_4.q.out |   43 +-
 .../clientpositive/perf/spark/query10.q.out     |  176 +-
 .../clientpositive/perf/spark/query16.q.out     |  234 +-
 .../clientpositive/perf/spark/query35.q.out     |  224 +-
 .../clientpositive/perf/spark/query69.q.out     |  155 +-
 .../clientpositive/perf/spark/query94.q.out     |  234 +-
 .../clientpositive/perf/tez/query10.q.out       |  272 +-
 .../clientpositive/perf/tez/query14.q.out       |    4 +-
 .../clientpositive/perf/tez/query16.q.out       |  255 +-
 .../clientpositive/perf/tez/query23.q.out       |    2 +-
 .../clientpositive/perf/tez/query35.q.out       |  274 +-
 .../clientpositive/perf/tez/query69.q.out       |  248 +-
 .../clientpositive/perf/tez/query94.q.out       |  255 +-
 .../test/results/clientpositive/semijoin5.q.out |   63 +-
 .../spark/constprog_partitioner.q.out           |    6 +-
 .../spark/constprog_semijoin.q.out              |   57 +-
 .../spark/spark_explainuser_1.q.out             |  136 +-
 .../clientpositive/spark/subquery_exists.q.out  |  317 +-
 .../clientpositive/spark/subquery_in.q.out      | 1571 ++++++----
 .../clientpositive/spark/subquery_multi.q.out   |  681 ++---
 .../clientpositive/spark/subquery_notin.q.out   |  136 +-
 .../clientpositive/spark/subquery_scalar.q.out  |  116 +-
 .../clientpositive/spark/subquery_select.q.out  |  141 +-
 .../clientpositive/spark/subquery_views.q.out   |   36 +-
 .../spark/vector_mapjoin_reduce.q.out           |   15 +-
 .../clientpositive/subquery_exists.q.out        |  347 +--
 .../clientpositive/subquery_exists_having.q.out |   38 +-
 .../clientpositive/subquery_in_having.q.out     | 2770 ------------------
 .../clientpositive/subquery_notexists.q.out     |   32 +-
 .../subquery_notexists_having.q.out             |   32 +-
 .../clientpositive/subquery_notin_having.q.out  |   26 +-
 .../subquery_unqualcolumnrefs.q.out             |  197 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |  444 ---
 56 files changed, 7625 insertions(+), 8059 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9b0bace..46abf8a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -353,7 +353,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vector_left_outer_join.q,\
   vector_left_outer_join2.q,\
   vector_leftsemi_mapjoin.q,\
-  vector_mapjoin_reduce.q,\
   vector_mr_diff_schema_alias.q,\
   vector_multi_insert.q,\
   vector_null_projection.q,\
@@ -621,6 +620,7 @@ minillaplocal.query.files=\
   special_character_in_tabnames_1.q,\
   sqlmerge.q,\
   stats_based_fetch_decision.q,\
+  subquery_in_having.q,\
   subquery_notin.q,\
   subquery_nested_subquery.q, \
   subquery_select.q, \
@@ -680,6 +680,7 @@ minillaplocal.query.files=\
   vector_join30.q,\
   vector_join_filters.q,\
   vector_leftsemi_mapjoin.q,\
+  vector_mapjoin_reduce.q,\
   vector_number_compare_projection.q,\
   vector_partitioned_date_time.q,\
   vector_ptf_part_simple.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
index 3a1897f..c6a5ce2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -1109,10 +1109,8 @@ public class HiveSubQRemoveRelBuilder {
     return getRexBuilder().makeFieldAccess(e, name, false);
   }
 
-  /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
-   * variables. */
   public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition,
-                         Set<CorrelationId> variablesSet) {
+                                       Set<CorrelationId> variablesSet, boolean createSemiJoin) {
     Frame right = stack.pop();
     final Frame left = stack.pop();
     final RelNode join;
@@ -1121,10 +1119,10 @@ public class HiveSubQRemoveRelBuilder {
     if (correlate) {
       final CorrelationId id = Iterables.getOnlyElement(variablesSet);
       final ImmutableBitSet requiredColumns =
-              RelOptUtil.correlationColumns(id, right.rel);
+          RelOptUtil.correlationColumns(id, right.rel);
       if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
         throw new IllegalArgumentException("variable " + id
-                + " must not be used by left input to correlation");
+            + " must not be used by left input to correlation");
       }
       switch (joinType) {
         case LEFT:
@@ -1138,11 +1136,18 @@ public class HiveSubQRemoveRelBuilder {
         default:
           postCondition = condition;
       }
-      join = correlateFactory.createCorrelate(left.rel, right.rel, id,
-              requiredColumns, SemiJoinType.of(joinType));
+      if(createSemiJoin) {
+        join = correlateFactory.createCorrelate(left.rel, right.rel, id,
+            requiredColumns, SemiJoinType.SEMI);
+      }
+      else {
+        join = correlateFactory.createCorrelate(left.rel, right.rel, id,
+            requiredColumns, SemiJoinType.of(joinType));
+
+      }
     } else {
       join = joinFactory.createJoin(left.rel, right.rel, condition,
-              variablesSet, joinType, false);
+          variablesSet, joinType, false);
     }
     final List<Pair<String, RelDataType>> pairs = new ArrayList<>();
     pairs.addAll(left.right);
@@ -1152,6 +1157,13 @@ public class HiveSubQRemoveRelBuilder {
     return this;
   }
 
+  /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
+   * variables. */
+  public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition,
+                         Set<CorrelationId> variablesSet) {
+    return join(joinType, condition, variablesSet, false) ;
+  }
+
   /** Creates a {@link org.apache.calcite.rel.core.Join} using USING syntax.
    *
    * <p>For each of the field names, both left and right inputs must have a

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index 62125f0..d1fe49c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -61,8 +61,10 @@ import org.apache.calcite.rel.core.Values;
 import org.apache.calcite.rel.logical.LogicalAggregate;
 import org.apache.calcite.rel.logical.LogicalCorrelate;
 import org.apache.calcite.rel.logical.LogicalFilter;
+import org.apache.calcite.rel.logical.LogicalIntersect;
 import org.apache.calcite.rel.logical.LogicalJoin;
 import org.apache.calcite.rel.logical.LogicalProject;
+import org.apache.calcite.rel.logical.LogicalUnion;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.rules.FilterJoinRule;
@@ -77,10 +79,12 @@ import org.apache.calcite.rex.RexFieldAccess;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
 import org.apache.calcite.rex.RexShuttle;
 import org.apache.calcite.rex.RexSubQuery;
 import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SemiJoinType;
 import org.apache.calcite.sql.SqlFunction;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
@@ -92,6 +96,7 @@ import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.util.Bug;
 import org.apache.calcite.util.Holder;
 import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.ImmutableIntList;
 import org.apache.calcite.util.Litmus;
 import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.ReflectUtil;
@@ -99,12 +104,17 @@ import org.apache.calcite.util.ReflectiveVisitor;
 import org.apache.calcite.util.Stacks;
 import org.apache.calcite.util.Util;
 import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -122,6 +132,8 @@ import com.google.common.collect.Multimaps;
 import com.google.common.collect.Sets;
 import com.google.common.collect.SortedSetMultimap;
 
+import java.util.Stack;
+
 /**
  * NOTE: this whole logic is replicated from Calcite's RelDecorrelator
  *  and is exteneded to make it suitable for HIVE
@@ -176,6 +188,8 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
 
   private final HashSet<LogicalCorrelate> generatedCorRels = Sets.newHashSet();
 
+  private Stack valueGen = new Stack();
+
   //~ Constructors -----------------------------------------------------------
 
   private HiveRelDecorrelator (
@@ -262,6 +276,8 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       return planner2.findBestExp();
     }
 
+    assert(valueGen.isEmpty());
+
     return root;
   }
 
@@ -319,8 +335,14 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     return planner.findBestExp();
   }
 
+  protected RexNode decorrelateExpr(RexNode exp, boolean valueGenerator) {
+    DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
+    shuttle.setValueGenerator(valueGenerator);
+    return exp.accept(shuttle);
+  }
   protected RexNode decorrelateExpr(RexNode exp) {
     DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
+    shuttle.setValueGenerator(true);
     return exp.accept(shuttle);
   }
 
@@ -1106,7 +1128,11 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
         try {
           findCorrelationEquivalent(correlation, ((Filter) rel).getCondition());
         } catch (Util.FoundOne e) {
-          map.put(def, (Integer) e.getNode());
+          // we need to keep predicate kind e.g. EQUAL or NOT EQUAL
+          // so that later while decorrelating LogicalCorrelate appropriate join predicate
+          // is generated
+          def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue());
+          map.put(def, (Integer)((Pair) e.getNode()).getKey());
         }
       }
       // If all correlation variables are now satisfied, skip creating a value
@@ -1145,16 +1171,23 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
   private void findCorrelationEquivalent(CorRef correlation, RexNode e)
           throws Util.FoundOne {
     switch (e.getKind()) {
-      case EQUALS:
+    // TODO: for now only EQUAL and NOT EQUAL corr predicates are optimized
+      //optimize rest of the predicates
+    case NOT_EQUALS:
+      if((boolean)valueGen.peek()) {
+        // we will need value generator
+        break;
+      }
+    case EQUALS:
         final RexCall call = (RexCall) e;
         final List<RexNode> operands = call.getOperands();
         if (references(operands.get(0), correlation)
                 && operands.get(1) instanceof RexInputRef) {
-          throw new Util.FoundOne(((RexInputRef) operands.get(1)).getIndex());
+          throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind()));
         }
         if (references(operands.get(1), correlation)
                 && operands.get(0) instanceof RexInputRef) {
-          throw new Util.FoundOne(((RexInputRef) operands.get(0)).getIndex());
+          throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind()));
         }
         break;
       case AND:
@@ -1223,17 +1256,38 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
         return null;
       }
 
+      Frame oldInputFrame = frame;
       // If this LogicalFilter has correlated reference, create value generator
       // and produce the correlated variables in the new output.
       if (cm.mapRefRelToCorRef.containsKey(rel)) {
         frame = decorrelateInputWithValueGenerator(rel);
       }
 
-      // Replace the filter expression to reference output of the join
-      // Map filter to the new filter over join
-      relBuilder.push(frame.r).filter(
-          simplifyComparison(decorrelateExpr(rel.getCondition())));
+      boolean valueGenerator = true;
+      if(frame.r == oldInputFrame.r) {
+        // this means correated value generator wasn't generated
+        valueGenerator = false;
+      }
 
+      if(oldInput instanceof LogicalCorrelate && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+          &&  !cm.mapRefRelToCorRef.containsKey(rel)) {
+        // this conditions need to be pushed into semi-join since this condition
+        // corresponds to IN
+        HiveSemiJoin join = ((HiveSemiJoin)frame.r);
+        final List<RexNode> conditions = new ArrayList<>();
+        RexNode joinCond = join.getCondition();
+        conditions.add(joinCond);
+        conditions.add(decorrelateExpr(rel.getCondition(), valueGenerator));
+        final RexNode condition =
+            RexUtil.composeConjunction(rexBuilder, conditions, false);
+        RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), join.getLeft(), join.getRight(),
+            condition,join.getLeftKeys(), join.getRightKeys());
+        return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
+      }
+      // Replace the filter expression to reference output of the join
+        // Map filter to the new filter over join
+        relBuilder.push(frame.r).filter(
+            (decorrelateExpr(rel.getCondition(), valueGenerator)));
       // Filter does not change the input ordering.
       // Filter rel does not permute the input.
       // All corvars produced by filter will have the same output positions in the
@@ -1243,39 +1297,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     }
   }
 
-  private RexNode simplifyComparison(RexNode op) {
-    switch(op.getKind()) {
-    case EQUALS:
-    case GREATER_THAN:
-    case GREATER_THAN_OR_EQUAL:
-    case LESS_THAN:
-    case LESS_THAN_OR_EQUAL:
-    case NOT_EQUALS:
-      RexCall e = (RexCall) op;
-      final List<RexNode> operands = new ArrayList<>(e.operands);
-
-      // Simplify "x <op> x"
-      final RexNode o0 = operands.get(0);
-      final RexNode o1 = operands.get(1);
-      // this should only be called when we are creating filter (decorrelate filter)
-      // since in that case null/unknown is treated as false we don't care about
-      // nullability of operands and will always rewrite op=op to op is not null
-      if (RexUtil.eq(o0, o1) )
-        switch (e.getKind()) {
-        case EQUALS:
-        case GREATER_THAN_OR_EQUAL:
-        case LESS_THAN_OR_EQUAL:
-          // "x = x" simplifies to "x is not null" (similarly <= and >=)
-          return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0);
-        default:
-          // "x != x" simplifies to "false" (similarly < and >)
-          return rexBuilder.makeLiteral(false);
-        }
-    }
-    return op;
-  }
-
-
     /**
      * Rewrite LogicalFilter.
      *
@@ -1312,9 +1333,31 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
 
     }
 
+    boolean valueGenerator = true;
+    if(frame.r == oldInput) {
+      // this means correated value generator wasn't generated
+      valueGenerator = false;
+    }
+
+    if(oldInput instanceof LogicalCorrelate && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+        &&  !cm.mapRefRelToCorRef.containsKey(rel)) {
+      // this conditions need to be pushed into semi-join since this condition
+      // corresponds to IN
+      HiveSemiJoin join = ((HiveSemiJoin)frame.r);
+      final List<RexNode> conditions = new ArrayList<>();
+      RexNode joinCond = join.getCondition();
+      conditions.add(joinCond);
+      conditions.add(decorrelateExpr(rel.getCondition(), valueGenerator));
+      final RexNode condition =
+          RexUtil.composeConjunction(rexBuilder, conditions, false);
+      RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), join.getLeft(), join.getRight(),
+          condition,join.getLeftKeys(), join.getRightKeys());
+      return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
+    }
+
     // Replace the filter expression to reference output of the join
     // Map filter to the new filter over join
-    relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition()));
+    relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition(), valueGenerator));
 
 
     // Filter does not change the input ordering.
@@ -1344,6 +1387,9 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     final RelNode oldLeft = rel.getInput(0);
     final RelNode oldRight = rel.getInput(1);
 
+    boolean mightRequireValueGen = new findIfValueGenRequired().traverse(oldRight);
+    valueGen.push(mightRequireValueGen);
+
     final Frame leftFrame = getInvoke(oldLeft, rel);
     final Frame rightFrame = getInvoke(oldRight, rel);
 
@@ -1380,11 +1426,24 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       }
       final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field);
       final int newRightPos = rightOutput.getValue();
-      conditions.add(
-              rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
-                      RexInputRef.of(newLeftPos, newLeftOutput),
-                      new RexInputRef(newLeftFieldCount + newRightPos,
-                              newRightOutput.get(newRightPos).getType())));
+      if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) {
+        conditions.add(
+            rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS,
+                RexInputRef.of(newLeftPos, newLeftOutput),
+                new RexInputRef(newLeftFieldCount + newRightPos,
+                    newRightOutput.get(newRightPos).getType())));
+
+      }
+      else {
+        assert(corDef.getPredicateKind() == null
+          || corDef.getPredicateKind() == SqlKind.EQUALS);
+        conditions.add(
+            rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+                RexInputRef.of(newLeftPos, newLeftOutput),
+                new RexInputRef(newLeftFieldCount + newRightPos,
+                    newRightOutput.get(newRightPos).getType())));
+
+      }
 
       // remove this cor var from output position mapping
       corDefOutputs.remove(corDef);
@@ -1408,24 +1467,40 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
 
     int oldRightFieldCount = oldRight.getRowType().getFieldCount();
-    assert rel.getRowType().getFieldCount()
-            == oldLeftFieldCount + oldRightFieldCount;
 
     // Left input positions are not changed.
     mapOldToNewOutputs.putAll(leftFrame.oldToNewOutputs);
 
-    // Right input positions are shifted by newLeftFieldCount.
-    for (int i = 0; i < oldRightFieldCount; i++) {
-      mapOldToNewOutputs.put(
-              i + oldLeftFieldCount,
-              rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount);
-    }
 
     final RexNode condition =
-            RexUtil.composeConjunction(rexBuilder, conditions, false);
-    RelNode newJoin =
-            LogicalJoin.create(leftFrame.r, rightFrame.r, condition,
-                    ImmutableSet.<CorrelationId>of(), rel.getJoinType().toJoinType());
+        RexUtil.composeConjunction(rexBuilder, conditions, false);
+    RelNode newJoin = null;
+
+    // this indicates original query was either correlated EXISTS or IN
+    if(rel.getJoinType() == SemiJoinType.SEMI) {
+      final List<Integer> leftKeys = new ArrayList<Integer>();
+      final List<Integer> rightKeys = new ArrayList<Integer>();
+
+      RelNode[] inputRels = new RelNode[] { leftFrame.r, rightFrame.r};
+      newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(), rel.getCluster().traitSetOf(HiveRelNode.CONVENTION),
+          leftFrame.r, rightFrame.r, condition, ImmutableIntList.copyOf(leftKeys),
+          ImmutableIntList.copyOf(rightKeys));
+
+    }
+    else {
+      // Right input positions are shifted by newLeftFieldCount.
+      for (int i = 0; i < oldRightFieldCount; i++) {
+        mapOldToNewOutputs.put(
+            i + oldLeftFieldCount,
+            rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount);
+      }
+
+      newJoin = LogicalJoin.create(leftFrame.r, rightFrame.r, condition,
+          ImmutableSet.<CorrelationId>of(), rel.getJoinType().toJoinType());
+
+    }
+
+    valueGen.pop();
 
     return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs);
   }
@@ -1545,6 +1620,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     assert currentRel != null;
 
     int oldOrdinal = oldInputRef.getIndex();
+    int oldOrdinalNo = oldOrdinal;
     int newOrdinal = 0;
 
     // determine which input rel oldOrdinal references, and adjust
@@ -1563,6 +1639,17 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       oldOrdinal -= n;
     }
 
+    if(oldInput == null) {
+      if(currentRel.getInputs().size() == 1 && currentRel.getInput(0) instanceof LogicalCorrelate) {
+        final Frame newFrame = map.get(currentRel.getInput(0));
+        if(newFrame.r instanceof HiveSemiJoin) {
+          int oldFieldSize = currentRel.getInput(0).getRowType().getFieldCount();
+          int newOrd = newFrame.r.getRowType().getFieldCount() + oldOrdinalNo - oldFieldSize;
+          return new RexInputRef(newOrd, oldInputRef.getType());
+        }
+      }
+    }
+
     assert oldInput != null;
 
     final Frame frame = map.get(oldInput);
@@ -1819,7 +1906,66 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
 
   /** Shuttle that decorrelates. */
   private class DecorrelateRexShuttle extends RexShuttle {
+    private boolean valueGenerator;
+    public void setValueGenerator(boolean valueGenerator) {
+      this.valueGenerator = valueGenerator;
+    }
+
+    // DecorrelateRexShuttle ends up decorrelating expressions cor.col1 <> $4
+    // to $4=$4 if value generator is not generated, $4<>$4 is further simplified
+    // to false. This is wrong and messes up the whole tree. To prevent this visitCall
+    // is overridden to rewrite/simply such predicates to is not null.
+    // we also need to take care that we do this only for correlated predicates and
+    // not user specified explicit predicates
+    // TODO:  This code should be removed once CALCITE-1851 is fixed and
+    // there is support of not equal
+    @Override  public RexNode visitCall(final RexCall call) {
+      if(!valueGenerator) {
+        switch (call.getKind()) {
+        case EQUALS:
+        case NOT_EQUALS:
+          final List<RexNode> operands = new ArrayList<>(call.operands);
+          RexNode o0 = operands.get(0);
+          RexNode o1 = operands.get(1);
+          boolean isCorrelated = false;
+          if (o0 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o0) != null)) {
+            o0 = decorrFieldAccess((RexFieldAccess) o0);
+            isCorrelated = true;
+
+          }
+          if (o1 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o1) != null)) {
+            o1 = decorrFieldAccess((RexFieldAccess) o1);
+            isCorrelated = true;
+          }
+          if (isCorrelated && RexUtil.eq(o0, o1)) {
+            return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0);
+          }
+
+          final List<RexNode> newOperands = new ArrayList<>();
+          newOperands.add(o0);
+          newOperands.add(o1);
+          boolean[] update = { false };
+          List<RexNode> clonedOperands = visitList(newOperands, update);
+
+          return relBuilder.call(call.getOperator(), clonedOperands);
+        }
+      }
+      return super.visitCall(call);
+    }
+
     @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) {
+      return decorrFieldAccess(fieldAccess);
+    }
+
+    @Override public RexNode visitInputRef(RexInputRef inputRef) {
+      final RexInputRef ref = getNewForOldInputRef(inputRef);
+      if (ref.getIndex() == inputRef.getIndex()
+              && ref.getType() == inputRef.getType()) {
+        return inputRef; // re-use old object, to prevent needless expr cloning
+      }
+      return ref;
+    }
+    private RexNode decorrFieldAccess(RexFieldAccess fieldAccess) {
       int newInputOutputOffset = 0;
       for (RelNode input : currentRel.getInputs()) {
         final Frame frame = map.get(input);
@@ -1834,7 +1980,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
               // This input rel does produce the cor var referenced.
               // Assume fieldAccess has the correct type info.
               return new RexInputRef(newInputPos + newInputOutputOffset,
-                      frame.r.getRowType().getFieldList().get(newInputPos)
+                  frame.r.getRowType().getFieldList().get(newInputPos)
                       .getType());
             }
           }
@@ -1848,15 +1994,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       }
       return fieldAccess;
     }
-
-    @Override public RexNode visitInputRef(RexInputRef inputRef) {
-      final RexInputRef ref = getNewForOldInputRef(inputRef);
-      if (ref.getIndex() == inputRef.getIndex()
-              && ref.getType() == inputRef.getType()) {
-        return inputRef; // re-use old object, to prevent needless expr cloning
-      }
-      return ref;
-    }
   }
 
   /** Shuttle that removes correlations. */
@@ -2881,10 +3018,12 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
   static class CorDef implements Comparable<CorDef> {
     public final CorrelationId corr;
     public final int field;
+    private SqlKind predicateKind;
 
     CorDef(CorrelationId corr, int field) {
       this.corr = corr;
       this.field = field;
+      this.predicateKind = null;
     }
 
     @Override public String toString() {
@@ -2909,6 +3048,13 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       }
       return Integer.compare(field, o.field);
     }
+    public SqlKind getPredicateKind() {
+      return predicateKind;
+    }
+    public void setPredicateKind(SqlKind predKind) {
+      this.predicateKind = predKind;
+
+    }
   }
 
   /** A map of the locations of
@@ -2986,6 +3132,107 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     }
   }
 
+  private static class findIfValueGenRequired extends HiveRelShuttleImpl {
+    private boolean mightRequireValueGen ;
+    findIfValueGenRequired() { this.mightRequireValueGen = true; }
+
+    private boolean hasRexOver(List<RexNode> projects) {
+      for(RexNode expr : projects) {
+        if(expr instanceof  RexOver) {
+          return true;
+        }
+      }
+      return false;
+    }
+    @Override public RelNode visit(HiveJoin rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+    public RelNode visit(HiveSortLimit rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+    public RelNode visit(HiveUnion rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+    public RelNode visit(LogicalUnion rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+    public RelNode visit(LogicalIntersect rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+
+    public RelNode visit(HiveIntersect rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+
+    @Override public RelNode visit(LogicalJoin rel) {
+      mightRequireValueGen = true;
+      return rel;
+    }
+    @Override public RelNode visit(HiveProject rel) {
+      if(!(hasRexOver(((HiveProject)rel).getProjects()))) {
+        mightRequireValueGen = false;
+        return super.visit(rel);
+      }
+      else {
+        mightRequireValueGen = true;
+        return rel;
+      }
+    }
+    @Override public RelNode visit(LogicalProject rel) {
+      if(!(hasRexOver(((LogicalProject)rel).getProjects()))) {
+        mightRequireValueGen = false;
+        return super.visit(rel);
+      }
+      else {
+        mightRequireValueGen = true;
+        return rel;
+      }
+    }
+    @Override public RelNode visit(HiveAggregate rel) {
+      // if there are aggregate functions or grouping sets we will need
+      // value generator
+      if((((HiveAggregate)rel).getAggCallList().isEmpty() == true
+          && ((HiveAggregate)rel).indicator == false)) {
+        this.mightRequireValueGen = false;
+        return super.visit(rel);
+      }
+      else {
+        // need to reset to true in case previous aggregate/project
+        // has set it to false
+        this.mightRequireValueGen = true;
+        return rel;
+      }
+    }
+    @Override public RelNode visit(LogicalAggregate rel) {
+      if((((LogicalAggregate)rel).getAggCallList().isEmpty() == true
+          && ((LogicalAggregate)rel).indicator == false)) {
+        this.mightRequireValueGen = false;
+        return super.visit(rel);
+      }
+      else {
+        // need to reset to true in case previous aggregate/project
+        // has set it to false
+        this.mightRequireValueGen = true;
+        return rel;
+      }
+    }
+    @Override public RelNode visit(LogicalCorrelate rel) {
+      // this means we are hitting nested subquery so don't
+      // need to go further
+      return rel;
+    }
+
+    public boolean traverse(RelNode root) {
+      root.accept(this);
+      return mightRequireValueGen;
+    }
+  }
   /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */
   private static class CorelMapBuilder extends HiveRelShuttleImpl {
     final SortedMap<CorrelationId, RelNode> mapCorToCorRel =

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 2dca6a2..90aab6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -370,9 +370,23 @@ public class HiveSubQueryRemoveRule extends RelOptRule{
             case TRUE:
                 if (fields.isEmpty()) {
                     builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId()));
-                    builder.aggregate(builder.groupKey(0));
+                    if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+                        // avoid adding group by for correlated IN/EXISTS queries
+                        // since this is rewritting into semijoin
+                        break;
+                    }
+                    else {
+                        builder.aggregate(builder.groupKey(0));
+                    }
                 } else {
-                    builder.aggregate(builder.groupKey(fields));
+                    if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+                        // avoid adding group by for correlated IN/EXISTS queries
+                        // since this is rewritting into semijoin
+                      break;
+                    }
+                    else {
+                        builder.aggregate(builder.groupKey(fields));
+                    }
                 }
                 break;
             default:
@@ -389,7 +403,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule{
             }
             switch (logic) {
             case TRUE:
-                builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet);
+                builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true);
                 return builder.literal(true);
             }
             builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index 5e8a994..73fdff4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelColumnsAlignment;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -273,10 +274,12 @@ public class PlanModifierForASTConv {
       // But we only need the additional project if the left child
       // is another join too; if it is not, ASTConverter will swap
       // the join inputs, leaving the join operator on the left.
+      // we also do it if parent is HiveSemiJoin since ASTConverter won't
+      // swap inputs then
       // This will help triggering multijoin recognition methods that
       // are embedded in SemanticAnalyzer.
       if (((Join) parent).getRight() == joinNode &&
-            (((Join) parent).getLeft() instanceof Join) ) {
+            (((Join) parent).getLeft() instanceof Join || parent instanceof HiveSemiJoin) ) {
         validParent = false;
       }
     } else if (parent instanceof SetOp) {

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 80351be..1363bbb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1788,11 +1788,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
 
       // 5. Push Down Semi Joins
-      perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
+      //TODO: Enable this later
+      /*perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
       basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE,
           SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
       perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
-        "Calcite: Prejoin ordering transformation, Push Down Semi Joins");
+        "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); */
 
       // 6. Apply Partition Pruning
       perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/queries/clientpositive/subquery_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_exists.q b/ql/src/test/queries/clientpositive/subquery_exists.q
index 19c42f0..08681e5 100644
--- a/ql/src/test/queries/clientpositive/subquery_exists.q
+++ b/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -89,8 +89,8 @@ drop table t;
 drop table if exists tx1;
 create table tx1 (a integer,b integer);
 insert into tx1	values  (1, 1),
-                        (1, 2),
-                        (1, 3);
+                         (1, 2),
+                         (1, 3);
 
 select count(*) as result,3 as expected from tx1 u
     where exists (select * from tx1 v where u.a=v.a and u.b <> v.b);
@@ -108,4 +108,4 @@ insert into t2 values(4,2),(4,3),(4,5);
 explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j);
 select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j);
 drop table t1;
-drop table t2;
+drop table t2;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index 4ba170a..33cc2fe 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -74,25 +74,47 @@ from part b where b.p_size in
 ;
 
 -- distinct, corr
-explain 
-select * 
-from src b 
+explain
+select *
+from src b
 where b.key in
-        (select distinct a.key 
-         from src a 
+        (select distinct a.key
+         from src a
          where b.value = a.value and a.key > '9'
         )
 ;
 
-select * 
-from src b 
+select *
+from src b
 where b.key in
-        (select distinct a.key 
-         from src a 
+        (select distinct a.key
+         from src a
          where b.value = a.value and a.key > '9'
         )
 ;
 
+-- corr, non equi predicate, should not have a join with outer to generate
+-- corr values
+explain
+select *
+from src b
+where b.key in
+        (select distinct a.key
+         from src a
+         where b.value <> a.key and a.key > '9'
+        )
+;
+
+select *
+from src b
+where b.key in
+        (select distinct a.key
+         from src a
+         where b.value <> a.key and a.key > '9'
+        )
+;
+
+
 -- non agg, non corr, windowing
 select p_mfgr, p_name, p_size 
 from part 
@@ -267,3 +289,13 @@ select * from t where i IN (select sum(i) from tt where tt.j = t.j);
 
 drop table t;
 drop table tt;
+
+-- since inner query has aggregate it will be joined with outer to get all possible corrrelated values
+explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
+select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
+
+-- inner query has join so should have a join with outer query to fetch all corr values
+explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
+select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index 87618df..455c630 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -91,7 +91,7 @@ STAGE PLANS:
             alias: li
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (l_linenumber = 1) (type: boolean)
+              predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
               Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
@@ -107,10 +107,10 @@ STAGE PLANS:
             alias: lineitem
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean)
+              predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
               Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), l_linenumber (type: int)
+                expressions: l_orderkey (type: int), 1 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
index 998a5df..7a48615 100644
--- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
@@ -119,24 +119,26 @@ Stage-0
         Select Operator [SEL_17] (rows=5 width=232)
           Output:["_col0","_col1","_col2"]
           Merge Join Operator [MERGEJOIN_28] (rows=5 width=232)
-            Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"]
+            Conds:RS_14._col2=RS_15._col0(Left Semi),Output:["_col0","_col4"]
           <-Map 5 [SIMPLE_EDGE] llap
             SHUFFLE [RS_15]
               PartitionCols:_col0
-              Select Operator [SEL_8] (rows=3 width=188)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_26] (rows=3 width=188)
-                  predicate:id is not null
-                  TableScan [TS_6] (rows=3 width=188)
-                    default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"]
+              Group By Operator [GBY_13] (rows=5 width=4)
+                Output:["_col0"],keys:_col0
+                Select Operator [SEL_8] (rows=5 width=4)
+                  Output:["_col0"]
+                  Filter Operator [FIL_26] (rows=5 width=4)
+                    predicate:id is not null
+                    TableScan [TS_6] (rows=5 width=4)
+                      default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
           <-Reducer 2 [SIMPLE_EDGE] llap
             SHUFFLE [RS_14]
-              PartitionCols:_col0
+              PartitionCols:_col2
               Merge Join Operator [MERGEJOIN_27] (rows=5 width=211)
-                Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"]
+                Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col2","_col4"]
               <-Map 1 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_11]
-                  PartitionCols:_col2
+                SHUFFLE [RS_9]
+                  PartitionCols:_col0
                   Select Operator [SEL_2] (rows=5 width=192)
                     Output:["_col0","_col2"]
                     Filter Operator [FIL_24] (rows=5 width=192)
@@ -144,16 +146,14 @@ Stage-0
                       TableScan [TS_0] (rows=10 width=192)
                         default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"]
               <-Map 4 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_12]
+                SHUFFLE [RS_10]
                   PartitionCols:_col0
-                  Group By Operator [GBY_10] (rows=5 width=4)
-                    Output:["_col0"],keys:_col0
-                    Select Operator [SEL_5] (rows=5 width=4)
-                      Output:["_col0"]
-                      Filter Operator [FIL_25] (rows=5 width=4)
-                        predicate:id is not null
-                        TableScan [TS_3] (rows=5 width=4)
-                          default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
+                  Select Operator [SEL_5] (rows=3 width=188)
+                    Output:["_col0","_col1"]
+                    Filter Operator [FIL_25] (rows=3 width=188)
+                      predicate:id is not null
+                      TableScan [TS_3] (rows=3 width=188)
+                        default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"]
 
 PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 inner join table2 on table1.val = 't1val01' and table1.id = table2.id left semi join table3 on table1.dimid = table3.id
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
index 87e08fb..e2e2ef3 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -73,14 +73,13 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
-        Map 14 <- Reducer 13 (BROADCAST_EDGE)
-        Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-        Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
-        Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE)
-        Reducer 13 <- Map 9 (CUSTOM_SIMPLE_EDGE)
+        Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+        Map 13 <- Reducer 12 (BROADCAST_EDGE)
+        Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+        Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE)
+        Reducer 12 <- Map 9 (CUSTOM_SIMPLE_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
         Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
         Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE)
@@ -91,10 +90,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: t1
-                  filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null and (decimal2016_col_26 BETWEEN DynamicValue(RS_21_t2_decimal2709_col_9_min) AND DynamicValue(RS_21_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_21_t2_decimal2709_col_9_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_21_t2_tinyint_col_20_min) AND DynamicValue(RS_21_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_21_t2_tinyint_col_20_bloom_filter))) and (bigint_col_7 BETWEEN DynamicValue(RS_21_t2_tinyint_col_15_min) AND DynamicValue(RS_21_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_21_t2_tinyint_col_15_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_24_tt2_timestamp_col_18_min) AND DynamicValue(RS_24_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_24_tt2_timestamp_col_18_bloom_filter)))) (type: boolean)
+                  filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null and timestamp_col_9 is not null and (decimal2016_col_26 BETWEEN DynamicValue(RS_17_t2_decimal2709_col_9_min) AND DynamicValue(RS_17_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_17_t2_decimal2709_col_9_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_17_t2_tinyint_col_20_min) AND DynamicValue(RS_17_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_17_t2_tinyint_col_20_bloom_filter))) and (bigint_col_7 BETWEEN DynamicValue(RS_17_t2_tinyint_col_15_min) AND DynamicValue(RS_17_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_17_t2_tinyint_col_15_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_22_tt2_timestamp_col_18_min) AND DynamicValue(RS_22_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_22_tt2_timestamp_col_18_bloom_filter)))) (
 type: boolean)
                   Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((bigint_col_7 BETWEEN DynamicValue(RS_21_t2_tinyint_col_15_min) AND DynamicValue(RS_21_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_21_t2_tinyint_col_15_bloom_filter))) and (decimal2016_col_26 BETWEEN DynamicValue(RS_21_t2_decimal2709_col_9_min) AND DynamicValue(RS_21_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_21_t2_decimal2709_col_9_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_24_tt2_timestamp_col_18_min) AND DynamicValue(RS_24_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_24_tt2_timestamp_col_18_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_21_t2_tinyint_col_20_min) AND DynamicValue(RS_21_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_21_t2_tinyint_col_20_bloom_filter))) and bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null) (type: boolean)
+                    predicate: ((bigint_col_7 BETWEEN DynamicValue(RS_17_t2_tinyint_col_15_min) AND DynamicValue(RS_17_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_17_t2_tinyint_col_15_bloom_filter))) and (decimal2016_col_26 BETWEEN DynamicValue(RS_17_t2_decimal2709_col_9_min) AND DynamicValue(RS_17_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_17_t2_decimal2709_col_9_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_22_tt2_timestamp_col_18_min) AND DynamicValue(RS_22_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_22_tt2_timestamp_col_18_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_17_t2_tinyint_col_20_min) AND DynamicValue(RS_17_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_17_t2_tinyint_col_20_bloom_filter))) and bigint_col_7 is not null and decimal2016_col_26 is not null and timestamp_col_9 is not null and tinyint_col_3 is not null) 
 (type: boolean)
                     Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: bigint_col_7 (type: bigint), decimal2016_col_26 (type: decimal(20,16)), tinyint_col_3 (type: tinyint), timestamp_col_9 (type: timestamp)
@@ -108,7 +107,7 @@ STAGE PLANS:
                         value expressions: _col3 (type: timestamp)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 14 
+        Map 13 
             Map Operator Tree:
                 TableScan
                   alias: tt2
@@ -133,10 +132,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: t2
-                  filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null) (type: boolean)
+                  filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null and smallint_col_19 is not null) (type: boolean)
                   Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (decimal2709_col_9 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean)
+                    predicate: (decimal2709_col_9 is not null and smallint_col_19 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean)
                     Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: tinyint_col_15 (type: tinyint), decimal2709_col_9 (type: decimal(27,9)), tinyint_col_20 (type: tinyint), smallint_col_19 (type: smallint)
@@ -233,47 +232,34 @@ STAGE PLANS:
                   1 _col0 (type: decimal(26,12))
                 outputColumnNames: _col2
                 Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col2 (type: timestamp)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: timestamp)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: timestamp)
-                    Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
-        Reducer 11 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: timestamp)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: timestamp)
-                  outputColumnNames: _col1
+                  expressions: _col2 (type: timestamp), -92 (type: int)
+                  outputColumnNames: _col0, _col1
                   Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: timestamp), -92 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col1 (type: timestamp), -92 (type: int)
+                  Group By Operator
+                    keys: _col0 (type: timestamp), _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col1 (type: timestamp)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
+                    Reduce Output Operator
+                      key expressions: _col0 (type: timestamp), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int)
+                      Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: timestamp)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
                         Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
-        Reducer 12 
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
+        Reducer 11 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -285,7 +271,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
-        Reducer 13 
+        Reducer 12 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -318,10 +304,10 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Inner Join 0 to 1
+                     Left Semi Join 0 to 1
                 keys:
                   0 _col3 (type: timestamp), UDFToInteger(_col7) (type: int)
-                  1 _col1 (type: timestamp), -92 (type: int)
+                  1 _col0 (type: timestamp), _col1 (type: int)
                 Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: count()

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 6e55acf..f03a65f 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1683,11 +1683,11 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_19]
-        Select Operator [SEL_18] (rows=434 width=178)
+        Select Operator [SEL_18] (rows=366 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_17] (rows=434 width=178)
+          Filter Operator [FIL_17] (rows=366 width=179)
             predicate:_col3 is null
-            Merge Join Operator [MERGEJOIN_22] (rows=500 width=178)
+            Merge Join Operator [MERGEJOIN_22] (rows=500 width=179)
               Conds:RS_14._col1=RS_15._col0(Left Outer),Output:["_col0","_col1","_col3"]
             <-Map 1 [SIMPLE_EDGE] llap
               SHUFFLE [RS_14]
@@ -1699,21 +1699,21 @@ Stage-0
             <-Reducer 3 [SIMPLE_EDGE] llap
               SHUFFLE [RS_15]
                 PartitionCols:_col0
-                Select Operator [SEL_13] (rows=41 width=95)
+                Select Operator [SEL_13] (rows=83 width=95)
                   Output:["_col0","_col1"]
-                  Group By Operator [GBY_12] (rows=41 width=91)
+                  Group By Operator [GBY_12] (rows=83 width=91)
                     Output:["_col0"],keys:_col1
-                    Select Operator [SEL_8] (rows=41 width=178)
+                    Select Operator [SEL_8] (rows=83 width=178)
                       Output:["_col1"]
-                      Group By Operator [GBY_7] (rows=41 width=178)
+                      Group By Operator [GBY_7] (rows=83 width=178)
                         Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                       <-Map 1 [SIMPLE_EDGE] llap
                         SHUFFLE [RS_6]
                           PartitionCols:_col0
-                          Group By Operator [GBY_5] (rows=41 width=178)
+                          Group By Operator [GBY_5] (rows=83 width=178)
                             Output:["_col0","_col1"],keys:value, key
-                            Filter Operator [FIL_21] (rows=83 width=178)
-                              predicate:((value = value) and (value > 'val_2'))
+                            Filter Operator [FIL_21] (rows=166 width=178)
+                              predicate:(value > 'val_2')
                                Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: explain select * 
@@ -1747,11 +1747,11 @@ Stage-0
     Stage-1
       Reducer 3 llap
       File Output Operator [FS_18]
-        Select Operator [SEL_17] (rows=234 width=178)
+        Select Operator [SEL_17] (rows=183 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_16] (rows=234 width=178)
+          Filter Operator [FIL_16] (rows=183 width=179)
             predicate:_col4 is null
-            Merge Join Operator [MERGEJOIN_21] (rows=250 width=178)
+            Merge Join Operator [MERGEJOIN_21] (rows=250 width=179)
               Conds:RS_13._col0, _col1=RS_14._col0, _col1(Left Outer),Output:["_col0","_col1","_col4"]
             <-Reducer 2 [ONE_TO_ONE_EDGE] llap
               FORWARD [RS_13]
@@ -1770,17 +1770,17 @@ Stage-0
             <-Reducer 4 [ONE_TO_ONE_EDGE] llap
               FORWARD [RS_14]
                 PartitionCols:_col0, _col1
-                Select Operator [SEL_12] (rows=20 width=182)
+                Select Operator [SEL_12] (rows=83 width=182)
                   Output:["_col0","_col1","_col2"]
-                  Group By Operator [GBY_11] (rows=20 width=178)
+                  Group By Operator [GBY_11] (rows=83 width=178)
                     Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                   <-Map 1 [SIMPLE_EDGE] llap
                     SHUFFLE [RS_10]
                       PartitionCols:_col0, _col1
-                      Group By Operator [GBY_9] (rows=20 width=178)
+                      Group By Operator [GBY_9] (rows=83 width=178)
                         Output:["_col0","_col1"],keys:key, value
-                        Filter Operator [FIL_20] (rows=41 width=178)
-                          predicate:((key = key) and (value = value) and (value > 'val_12'))
+                        Filter Operator [FIL_20] (rows=166 width=178)
+                          predicate:((value > 'val_12') and key is not null)
                            Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: create view cv1 as 
@@ -1821,26 +1821,28 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_11]
-        Merge Join Operator [MERGEJOIN_16] (rows=32 width=178)
-          Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"]
+      File Output Operator [FS_12]
+        Merge Join Operator [MERGEJOIN_17] (rows=133 width=178)
+          Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_7]
+          SHUFFLE [RS_8]
             PartitionCols:_col0, _col1
-            Select Operator [SEL_1] (rows=500 width=178)
+            Select Operator [SEL_2] (rows=166 width=178)
               Output:["_col0","_col1"]
-              TableScan [TS_0] (rows=500 width=178)
-                default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
+              Filter Operator [FIL_15] (rows=166 width=178)
+                predicate:((value > 'val_9') and key is not null)
+                TableScan [TS_0] (rows=500 width=178)
+                  default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
         <-Map 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_8]
+          SHUFFLE [RS_9]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_6] (rows=20 width=178)
+            Group By Operator [GBY_7] (rows=83 width=178)
               Output:["_col0","_col1"],keys:_col0, _col1
-              Select Operator [SEL_4] (rows=41 width=178)
+              Select Operator [SEL_5] (rows=166 width=178)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_15] (rows=41 width=178)
-                  predicate:((key = key) and (value = value) and (value > 'val_9'))
-                  TableScan [TS_2] (rows=500 width=178)
+                Filter Operator [FIL_16] (rows=166 width=178)
+                  predicate:((value > 'val_9') and key is not null)
+                  TableScan [TS_3] (rows=500 width=178)
                     default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
 PREHOOK: query: explain select * 
@@ -1871,26 +1873,28 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_11]
-        Merge Join Operator [MERGEJOIN_16] (rows=32 width=178)
-          Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"]
+      File Output Operator [FS_12]
+        Merge Join Operator [MERGEJOIN_17] (rows=133 width=178)
+          Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_7]
+          SHUFFLE [RS_8]
             PartitionCols:_col0, _col1
-            Select Operator [SEL_1] (rows=500 width=178)
+            Select Operator [SEL_2] (rows=166 width=178)
               Output:["_col0","_col1"]
-              TableScan [TS_0] (rows=500 width=178)
-                default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+              Filter Operator [FIL_15] (rows=166 width=178)
+                predicate:((value > 'val_9') and key is not null)
+                TableScan [TS_0] (rows=500 width=178)
+                  default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
         <-Map 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_8]
+          SHUFFLE [RS_9]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_6] (rows=20 width=178)
+            Group By Operator [GBY_7] (rows=83 width=178)
               Output:["_col0","_col1"],keys:_col0, _col1
-              Select Operator [SEL_4] (rows=41 width=178)
+              Select Operator [SEL_5] (rows=166 width=178)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_15] (rows=41 width=178)
-                  predicate:((key = key) and (value = value) and (value > 'val_9'))
-                  TableScan [TS_2] (rows=500 width=178)
+                Filter Operator [FIL_16] (rows=166 width=178)
+                  predicate:((value > 'val_9') and key is not null)
+                  TableScan [TS_3] (rows=500 width=178)
                     default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
 PREHOOK: query: explain select * 
@@ -1958,19 +1962,19 @@ Stage-0
     Stage-1
       Reducer 4 llap
       File Output Operator [FS_22]
-        Select Operator [SEL_21] (rows=7 width=8)
+        Select Operator [SEL_21] (rows=3 width=8)
           Output:["_col0","_col1"]
-          Merge Join Operator [MERGEJOIN_32] (rows=7 width=8)
+          Merge Join Operator [MERGEJOIN_32] (rows=3 width=8)
             Conds:RS_18._col1, _col4=RS_19._col0, _col1(Left Semi),Output:["_col0","_col3"]
           <-Map 6 [SIMPLE_EDGE] llap
             SHUFFLE [RS_19]
               PartitionCols:_col0, _col1
-              Group By Operator [GBY_17] (rows=2 width=8)
+              Group By Operator [GBY_17] (rows=1 width=8)
                 Output:["_col0","_col1"],keys:_col0, _col1
-                Select Operator [SEL_12] (rows=7 width=8)
+                Select Operator [SEL_12] (rows=2 width=8)
                   Output:["_col0","_col1"]
-                  Filter Operator [FIL_30] (rows=7 width=96)
-                    predicate:((l_linenumber = l_linenumber) and (l_shipmode = 'AIR'))
+                  Filter Operator [FIL_30] (rows=2 width=96)
+                    predicate:((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null)
                     TableScan [TS_10] (rows=100 width=96)
                       default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
           <-Reducer 3 [SIMPLE_EDGE] llap
@@ -1984,7 +1988,7 @@ Stage-0
                   Select Operator [SEL_9] (rows=14 width=16)
                     Output:["_col0","_col1","_col2","_col3"]
                     Filter Operator [FIL_29] (rows=14 width=16)
-                      predicate:((l_linenumber = 1) and l_partkey is not null)
+                      predicate:((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null)
                       TableScan [TS_7] (rows=100 width=16)
                         default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
               <-Reducer 2 [ONE_TO_ONE_EDGE] llap
@@ -2258,14 +2262,14 @@ Stage-0
       File Output Operator [FS_26]
         Select Operator [SEL_25] (rows=13 width=223)
           Output:["_col0","_col1","_col2"]
-          Filter Operator [FIL_24] (rows=13 width=227)
+          Filter Operator [FIL_24] (rows=13 width=231)
             predicate:(not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END)
-            Merge Join Operator [MERGEJOIN_32] (rows=26 width=227)
+            Merge Join Operator [MERGEJOIN_32] (rows=26 width=230)
               Conds:RS_21._col0, _col1=RS_22._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col8"]
             <-Reducer 2 [SIMPLE_EDGE] llap
               SHUFFLE [RS_21]
                 PartitionCols:_col0, _col1
-                Merge Join Operator [MERGEJOIN_31] (rows=26 width=226)
+                Merge Join Operator [MERGEJOIN_31] (rows=26 width=229)
                   Conds:RS_18._col1=RS_19._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"]
                 <-Map 1 [SIMPLE_EDGE] llap
                   SHUFFLE [RS_18]
@@ -2277,36 +2281,36 @@ Stage-0
                 <-Reducer 4 [ONE_TO_ONE_EDGE] llap
                   FORWARD [RS_19]
                     PartitionCols:_col0
-                    Group By Operator [GBY_7] (rows=1 width=114)
+                    Group By Operator [GBY_7] (rows=2 width=114)
                       Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
                     <-Map 1 [SIMPLE_EDGE] llap
                       SHUFFLE [RS_6]
                         PartitionCols:_col0
-                        Group By Operator [GBY_5] (rows=1 width=114)
+                        Group By Operator [GBY_5] (rows=2 width=114)
                           Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr
-                          Select Operator [SEL_4] (rows=4 width=223)
+                          Select Operator [SEL_4] (rows=8 width=223)
                             Output:["p_name","p_mfgr"]
-                            Filter Operator [FIL_29] (rows=4 width=223)
-                              predicate:((p_mfgr = p_mfgr) and (p_size < 10))
+                            Filter Operator [FIL_29] (rows=8 width=223)
+                              predicate:((p_size < 10) and p_mfgr is not null)
                                Please refer to the previous TableScan [TS_0]
             <-Reducer 5 [ONE_TO_ONE_EDGE] llap
               FORWARD [RS_22]
                 PartitionCols:_col0, _col1
-                Select Operator [SEL_17] (rows=2 width=223)
+                Select Operator [SEL_17] (rows=4 width=223)
                   Output:["_col0","_col1","_col2"]
-                  Filter Operator [FIL_16] (rows=2 width=219)
+                  Filter Operator [FIL_16] (rows=4 width=219)
                     predicate:_col0 is not null
-                    Group By Operator [GBY_14] (rows=2 width=219)
+                    Group By Operator [GBY_14] (rows=4 width=219)
                       Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                     <-Map 1 [SIMPLE_EDGE] llap
                       SHUFFLE [RS_13]
                         PartitionCols:_col0, _col1
-                        Group By Operator [GBY_12] (rows=2 width=219)
+                        Group By Operator [GBY_12] (rows=4 width=219)
                           Output:["_col0","_col1"],keys:p_name, p_mfgr
-                          Select Operator [SEL_11] (rows=4 width=223)
+                          Select Operator [SEL_11] (rows=8 width=223)
                             Output:["p_name","p_mfgr"]
-                            Filter Operator [FIL_30] (rows=4 width=223)
-                              predicate:((p_mfgr = p_mfgr) and (p_size < 10))
+                            Filter Operator [FIL_30] (rows=8 width=223)
+                              predicate:((p_size < 10) and p_mfgr is not null)
                                Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: explain select p_name, p_size 
@@ -2465,7 +2469,7 @@ Stage-0
                               Select Operator [SEL_12] (rows=1 width=114)
                                 Output:["_col0","_col1"]
                                 Filter Operator [FIL_40] (rows=1 width=114)
-                                  predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1))
+                                  predicate:(((_col2 - _col1) > 600.0) and _col1 is not null)
                                   Group By Operator [GBY_10] (rows=5 width=114)
                                     Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
                                   <-Map 1 [SIMPLE_EDGE] llap
@@ -2484,7 +2488,7 @@ Stage-0
                         Select Operator [SEL_24] (rows=1 width=110)
                           Output:["_col0","_col1"]
                           Filter Operator [FIL_41] (rows=1 width=114)
-                            predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1))
+                            predicate:(((_col2 - _col1) > 600.0) and _col1 is not null)
                             Group By Operator [GBY_22] (rows=5 width=114)
                               Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
                             <-Map 1 [SIMPLE_EDGE] llap