You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/02/05 21:34:56 UTC
[7/7] hive git commit: HIVE-15458 : Fix semi-join conversion rule for subquery (Vineet Garg via Ashutosh Chauhan)

HIVE-15458 : Fix semi-join conversion rule for subquery (Vineet Garg via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f63dc2d4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f63dc2d4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f63dc2d4

Branch: refs/heads/master
Commit: f63dc2d4fbbf09a04af98c4a9ba047a355a2da0a
Parents: bc0aeec
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Fri Feb 3 18:58:00 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sun Feb 5 13:33:55 2017 -0800

----------------------------------------------------------------------
 .../calcite/rules/HiveSemiJoinRule.java         |   25 +-
 .../calcite/translator/ASTConverter.java        |    3 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |   20 +-
 .../test/queries/clientpositive/leftsemijoin.q  |    8 +
 .../test/queries/clientpositive/multiMapJoin2.q |    8 +-
 .../clientpositive/constprog_partitioner.q.out  |   61 +-
 .../results/clientpositive/leftsemijoin.q.out   |  199 +++
 .../llap/dynamic_partition_pruning.q.out        |  114 +-
 .../clientpositive/llap/explainuser_1.q.out     |  444 +++---
 .../clientpositive/llap/leftsemijoin.q.out      |  206 +++
 .../results/clientpositive/llap/lineage3.q.out  |    2 +-
 .../clientpositive/llap/multiMapJoin2.q.out     | 1269 +++++++++---------
 .../clientpositive/llap/subquery_exists.q.out   |   90 +-
 .../clientpositive/llap/subquery_in.q.out       |  606 +++------
 .../clientpositive/llap/subquery_multi.q.out    |  444 +++---
 .../clientpositive/llap/subquery_notin.q.out    |   76 +-
 .../clientpositive/llap/subquery_scalar.q.out   |  164 +--
 .../clientpositive/llap/subquery_views.q.out    |   98 +-
 .../llap/vector_mapjoin_reduce.q.out            |  175 +--
 .../vectorized_dynamic_partition_pruning.q.out  |  121 +-
 .../test/results/clientpositive/masking_3.q.out |  275 +---
 .../test/results/clientpositive/masking_4.q.out |   39 +-
 .../results/clientpositive/perf/query70.q.out   |  178 ++-
 .../spark/constprog_partitioner.q.out           |   49 +-
 .../clientpositive/spark/leftsemijoin.q.out     |  197 +++
 .../clientpositive/spark/subquery_exists.q.out  |   90 +-
 .../clientpositive/spark/subquery_in.q.out      |  599 +++------
 .../spark/vector_mapjoin_reduce.q.out           |  131 +-
 .../subq_where_serialization.q.out              |   98 +-
 .../clientpositive/subquery_exists.q.out        |  114 +-
 .../clientpositive/subquery_exists_having.q.out |  141 +-
 .../clientpositive/subquery_in_having.q.out     |  528 ++++----
 .../subquery_unqualcolumnrefs.q.out             |  122 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |  398 ++----
 34 files changed, 3291 insertions(+), 3801 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
index 14eb3a6..e400896 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
@@ -20,6 +20,7 @@ import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.Join;
@@ -37,6 +38,7 @@ import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Lists;
 
+import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -84,6 +86,11 @@ public class HiveSemiJoinRule extends RelOptRule {
       // By the way, neither a super-set nor a sub-set would work.
       return;
     }
+    if(join.getJoinType() == JoinRelType.LEFT) {
+      // since for LEFT join we are only interested in rows from LEFT we can get rid of right side
+      call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build());
+      return;
+    }
     if (join.getJoinType() != JoinRelType.INNER) {
       return;
     }
@@ -102,7 +109,23 @@ public class HiveSemiJoinRule extends RelOptRule {
     final RexNode newCondition =
         RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight,
             newRightKeys, rexBuilder);
-    RelNode semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
+
+    RelNode semi = null;
+    //HIVE-15458: we need to add a Project on top of Join since SemiJoin with Join as it's right input
+    // is not expected further down the pipeline. see jira for more details
+    if(aggregate.getInput() instanceof HepRelVertex
+          && ((HepRelVertex)aggregate.getInput()).getCurrentRel() instanceof  Join) {
+        Join rightJoin = (Join)(((HepRelVertex)aggregate.getInput()).getCurrentRel());
+        List<RexNode> projects = new ArrayList<>();
+        for(int i=0; i<rightJoin.getRowType().getFieldCount(); i++){
+          projects.add(rexBuilder.makeInputRef(rightJoin, i));
+        }
+       RelNode topProject =  call.builder().push(rightJoin).project(projects, rightJoin.getRowType().getFieldNames(), true).build();
+      semi = call.builder().push(left).push(topProject).semiJoin(newCondition).build();
+    }
+    else {
+      semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
+    }
     call.transformTo(call.builder().push(semi).project(project.getProjects(), project.getRowType().getFieldNames()).build());
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index e78c8e9..27990a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -360,7 +360,8 @@ public class ASTConverter {
       s = new Schema(left.schema, right.schema);
       ASTNode cond = join.getCondition().accept(new RexVisitor(s));
       boolean semiJoin = join instanceof SemiJoin;
-      if (join.getRight() instanceof Join) {
+      if (join.getRight() instanceof Join && !semiJoin) {
+          // should not be done for semijoin since it will change the semantics
         // Invert join inputs; this is done because otherwise the SemanticAnalyzer
         // methods to merge joins will not kick in
         JoinRelType type;

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index a268d80..96ff5df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -136,8 +136,17 @@ import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.calcite.*;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
@@ -337,10 +346,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
         boolean reAnalyzeAST = false;
         final boolean materializedView = getQB().isMaterializedView();
 
-        // currently semi-join optimization doesn't work with subqueries
-        // so this will be turned off for if we find subqueries and will later be
-        // restored to its original state
-        boolean originalSemiOptVal = this.conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION);
         try {
           if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
             sinkOp = getOptimizedHiveOPDag();
@@ -446,8 +451,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
             super.genResolvedParseTree(ast, new PlannerContext());
             skipCalcitePlan = true;
           }
-          // restore semi-join opt flag
-          this.conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, originalSemiOptVal);
         }
       } else {
         this.ctx.setCboInfo("Plan not optimized by CBO.");
@@ -2416,9 +2419,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
                 .get(srcRel));
         relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
         this.subqueryId++;
-
-        // semi-join opt doesn't work with subqueries
-        conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, false);
         return filterRel;
       } else {
         return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause);

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/queries/clientpositive/leftsemijoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/leftsemijoin.q b/ql/src/test/queries/clientpositive/leftsemijoin.q
index 71c3a0d..8974219 100644
--- a/ql/src/test/queries/clientpositive/leftsemijoin.q
+++ b/ql/src/test/queries/clientpositive/leftsemijoin.q
@@ -24,3 +24,11 @@ SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id);
 
 drop table sales;
 drop table things;
+
+-- HIVE-15458
+explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
+select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
+
+-- Semi join optmization should take out the right side
+explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
+select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/queries/clientpositive/multiMapJoin2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q
index c66dc66..38ab1a7 100644
--- a/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -195,7 +195,7 @@ set hive.optimize.correlation=false;
 -- HIVE-5891 Alias conflict when merging multiple mapjoin tasks into their common
 -- child mapred task
 EXPLAIN   
-SELECT * FROM (
+SELECT x.key FROM (
   SELECT c.key FROM
     (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
     JOIN src c ON tmp.key=c.key
@@ -203,9 +203,9 @@ SELECT * FROM (
   SELECT c.key FROM
     (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
     JOIN src c ON tmp.key=c.key
-) x;
+) x order by x.key;
 
-SELECT * FROM (
+SELECT x.key FROM (
   SELECT c.key FROM
     (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
     JOIN src c ON tmp.key=c.key
@@ -213,5 +213,5 @@ SELECT * FROM (
   SELECT c.key FROM
     (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
     JOIN src c ON tmp.key=c.key
-) x;
+) x order by x.key;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index d4ccb8c..2a44269 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -80,14 +80,13 @@ WHERE li.l_linenumber = 1 AND
  li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-2 depends on stages: Stage-4
-  Stage-3 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-3
+  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-4
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -154,39 +153,21 @@ STAGE PLANS:
             1 _col0 (type: int)
           outputColumnNames: _col0, _col3
           Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            keys: _col0 (type: int), _col3 (type: int)
-            mode: hash
+          Select Operator
+            expressions: _col0 (type: int), _col3 (type: int)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: int), _col1 (type: int)
-              sort order: ++
-              Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+            Group By Operator
+              keys: _col0 (type: int), _col1 (type: int)
+              mode: hash
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Group By Operator
-          keys: KEY._col0 (type: int), KEY._col1 (type: int)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Map Reduce
@@ -212,23 +193,23 @@ STAGE PLANS:
               key expressions: _col0 (type: int), _col1 (type: int)
               sort order: ++
               Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-              Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Inner Join 0 to 1
+               Left Semi Join 0 to 1
           keys:
             0 _col0 (type: int), 1 (type: int)
             1 _col0 (type: int), _col1 (type: int)
           outputColumnNames: _col1, _col2
-          Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col1 (type: int), _col2 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/leftsemijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/leftsemijoin.q.out b/ql/src/test/results/clientpositive/leftsemijoin.q.out
index a11bbc4..28229cd 100644
--- a/ql/src/test/results/clientpositive/leftsemijoin.q.out
+++ b/ql/src/test/results/clientpositive/leftsemijoin.q.out
@@ -108,3 +108,202 @@ POSTHOOK: query: drop table things
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@things
 POSTHOOK: Output: default@things
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: p1
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: p_name is not null (type: boolean)
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: p_name (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            alias: p2
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0
+          Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string)
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: part
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: p_name is not null (type: boolean)
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: p_name (type: string), p_type (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Semi Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1
+          Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: string)
+            outputColumnNames: _col0
+            Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL
+PREHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: part
+          Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: p_type (type: string)
+            outputColumnNames: _col0
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            ListSink
+
+PREHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 1b6bb1f..c63daba 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -3730,10 +3730,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 10 <- Union 9 (SIMPLE_EDGE)
-        Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS)
+        Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
-        Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE)
+        Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
         Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
         Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS)
 #### A masked pattern was here ####
@@ -3756,7 +3755,7 @@ STAGE PLANS:
                       Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 11 
+        Map 10 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3814,50 +3813,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 10 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: string)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Dynamic Partitioning Event Operator
-                      Target column: ds (string)
-                      Target Input: srcpart
-                      Partition key expr: ds
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Target Vertex: Map 1
-                Select Operator
-                  expressions: _col0 (type: string)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Dynamic Partitioning Event Operator
-                      Target column: ds (string)
-                      Target Input: srcpart
-                      Partition key expr: ds
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Target Vertex: Map 5
-        Reducer 12 
+        Reducer 11 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -3878,6 +3834,36 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                        Dynamic Partitioning Event Operator
+                          Target column: ds (string)
+                          Target Input: srcpart
+                          Partition key expr: ds
+                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                          Target Vertex: Map 1
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                        Dynamic Partitioning Event Operator
+                          Target column: ds (string)
+                          Target Input: srcpart
+                          Partition key expr: ds
+                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                          Target Vertex: Map 5
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -3896,7 +3882,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Inner Join 0 to 1
+                     Left Semi Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3943,6 +3929,36 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                        Dynamic Partitioning Event Operator
+                          Target column: ds (string)
+                          Target Input: srcpart
+                          Partition key expr: ds
+                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                          Target Vertex: Map 1
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                        Dynamic Partitioning Event Operator
+                          Target column: ds (string)
+                          Target Input: srcpart
+                          Partition key expr: ds
+                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                          Target Vertex: Map 5
         Union 3 
             Vertex: Union 3
         Union 9 

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index c3f8071..621f337 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1991,59 +1991,55 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_23]
-        Merge Join Operator [MERGEJOIN_33] (rows=1 width=178)
-          Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"]
+      File Output Operator [FS_21]
+        Merge Join Operator [MERGEJOIN_31] (rows=1 width=178)
+          Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_19]
+          SHUFFLE [RS_17]
             PartitionCols:_col0, _col1
             Select Operator [SEL_1] (rows=500 width=178)
               Output:["_col0","_col1"]
               TableScan [TS_0] (rows=500 width=178)
                 default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
-        <-Reducer 5 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_20]
+        <-Reducer 4 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_18]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_17] (rows=1 width=178)
-              Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-            <-Reducer 4 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_16]
-                PartitionCols:_col0, _col1
-                Group By Operator [GBY_15] (rows=1 width=178)
-                  Output:["_col0","_col1"],keys:_col2, _col3
-                  Merge Join Operator [MERGEJOIN_32] (rows=1 width=178)
-                    Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
-                  <-Map 3 [SIMPLE_EDGE] llap
-                    SHUFFLE [RS_11]
-                      PartitionCols:_col0, _col1
-                      Select Operator [SEL_4] (rows=166 width=178)
-                        Output:["_col0","_col1"]
-                        Filter Operator [FIL_30] (rows=166 width=178)
-                          predicate:(value > 'val_9')
-                          TableScan [TS_2] (rows=500 width=178)
-                            default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-                  <-Reducer 7 [SIMPLE_EDGE] llap
-                    SHUFFLE [RS_12]
-                      PartitionCols:_col0, _col1
-                      Group By Operator [GBY_9] (rows=250 width=178)
-                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                      <-Map 6 [SIMPLE_EDGE] llap
-                        SHUFFLE [RS_8]
-                          PartitionCols:_col0, _col1
-                          Group By Operator [GBY_7] (rows=250 width=178)
-                            Output:["_col0","_col1"],keys:key, value
-                            TableScan [TS_5] (rows=500 width=178)
-                              default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
+            Group By Operator [GBY_16] (rows=1 width=178)
+              Output:["_col0","_col1"],keys:_col0, _col1
+              Select Operator [SEL_14] (rows=1 width=178)
+                Output:["_col0","_col1"]
+                Merge Join Operator [MERGEJOIN_30] (rows=1 width=178)
+                  Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
+                <-Map 3 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_11]
+                    PartitionCols:_col0, _col1
+                    Select Operator [SEL_4] (rows=166 width=178)
+                      Output:["_col0","_col1"]
+                      Filter Operator [FIL_28] (rows=166 width=178)
+                        predicate:(value > 'val_9')
+                        TableScan [TS_2] (rows=500 width=178)
+                          default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                <-Reducer 6 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_12]
+                    PartitionCols:_col0, _col1
+                    Group By Operator [GBY_9] (rows=250 width=178)
+                      Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                    <-Map 5 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_8]
+                        PartitionCols:_col0, _col1
+                        Group By Operator [GBY_7] (rows=250 width=178)
+                          Output:["_col0","_col1"],keys:key, value
+                          TableScan [TS_5] (rows=500 width=178)
+                            default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
 
 PREHOOK: query: explain select * 
 from (select * 
@@ -2066,59 +2062,55 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_23]
-        Merge Join Operator [MERGEJOIN_33] (rows=1 width=178)
-          Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"]
+      File Output Operator [FS_21]
+        Merge Join Operator [MERGEJOIN_31] (rows=1 width=178)
+          Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_19]
+          SHUFFLE [RS_17]
             PartitionCols:_col0, _col1
             Select Operator [SEL_1] (rows=500 width=178)
               Output:["_col0","_col1"]
               TableScan [TS_0] (rows=500 width=178)
                 default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-        <-Reducer 5 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_20]
+        <-Reducer 4 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_18]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_17] (rows=1 width=178)
-              Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-            <-Reducer 4 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_16]
-                PartitionCols:_col0, _col1
-                Group By Operator [GBY_15] (rows=1 width=178)
-                  Output:["_col0","_col1"],keys:_col2, _col3
-                  Merge Join Operator [MERGEJOIN_32] (rows=1 width=178)
-                    Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
-                  <-Map 3 [SIMPLE_EDGE] llap
-                    SHUFFLE [RS_11]
-                      PartitionCols:_col0, _col1
-                      Select Operator [SEL_4] (rows=166 width=178)
-                        Output:["_col0","_col1"]
-                        Filter Operator [FIL_30] (rows=166 width=178)
-                          predicate:(value > 'val_9')
-                          TableScan [TS_2] (rows=500 width=178)
-                            default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-                  <-Reducer 7 [SIMPLE_EDGE] llap
-                    SHUFFLE [RS_12]
-                      PartitionCols:_col0, _col1
-                      Group By Operator [GBY_9] (rows=250 width=178)
-                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                      <-Map 6 [SIMPLE_EDGE] llap
-                        SHUFFLE [RS_8]
-                          PartitionCols:_col0, _col1
-                          Group By Operator [GBY_7] (rows=250 width=178)
-                            Output:["_col0","_col1"],keys:key, value
-                            TableScan [TS_5] (rows=500 width=178)
-                              default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+            Group By Operator [GBY_16] (rows=1 width=178)
+              Output:["_col0","_col1"],keys:_col0, _col1
+              Select Operator [SEL_14] (rows=1 width=178)
+                Output:["_col0","_col1"]
+                Merge Join Operator [MERGEJOIN_30] (rows=1 width=178)
+                  Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
+                <-Map 3 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_11]
+                    PartitionCols:_col0, _col1
+                    Select Operator [SEL_4] (rows=166 width=178)
+                      Output:["_col0","_col1"]
+                      Filter Operator [FIL_28] (rows=166 width=178)
+                        predicate:(value > 'val_9')
+                        TableScan [TS_2] (rows=500 width=178)
+                          default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                <-Reducer 6 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_12]
+                    PartitionCols:_col0, _col1
+                    Group By Operator [GBY_9] (rows=250 width=178)
+                      Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                    <-Map 5 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_8]
+                        PartitionCols:_col0, _col1
+                        Group By Operator [GBY_7] (rows=250 width=178)
+                          Output:["_col0","_col1"],keys:key, value
+                          TableScan [TS_5] (rows=500 width=178)
+                            default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
 PREHOOK: query: explain select * 
 from src_cbo 
@@ -2131,40 +2123,36 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_14]
-        Merge Join Operator [MERGEJOIN_19] (rows=166 width=178)
-          Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"]
+      File Output Operator [FS_12]
+        Merge Join Operator [MERGEJOIN_17] (rows=166 width=178)
+          Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_10]
+          SHUFFLE [RS_8]
             PartitionCols:_col0
             Select Operator [SEL_2] (rows=166 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_17] (rows=166 width=178)
+              Filter Operator [FIL_15] (rows=166 width=178)
                 predicate:(key > '9')
                 TableScan [TS_0] (rows=500 width=178)
                   default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-        <-Reducer 4 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_11]
+        <-Map 3 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_9]
             PartitionCols:_col0
-            Group By Operator [GBY_8] (rows=69 width=87)
-              Output:["_col0"],keys:KEY._col0
-            <-Map 3 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_7]
-                PartitionCols:_col0
-                Group By Operator [GBY_6] (rows=69 width=87)
-                  Output:["_col0"],keys:key
-                  Filter Operator [FIL_18] (rows=166 width=87)
-                    predicate:(key > '9')
-                    TableScan [TS_3] (rows=500 width=87)
-                      default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+            Group By Operator [GBY_7] (rows=69 width=87)
+              Output:["_col0"],keys:_col0
+              Select Operator [SEL_5] (rows=166 width=87)
+                Output:["_col0"]
+                Filter Operator [FIL_16] (rows=166 width=87)
+                  predicate:(key > '9')
+                  TableScan [TS_3] (rows=500 width=87)
+                    default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
 
 PREHOOK: query: explain select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
@@ -2179,41 +2167,40 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 10 <- Map 9 (SIMPLE_EDGE)
-Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
-Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
 Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_46]
-        Select Operator [SEL_45] (rows=5 width=8)
+      File Output Operator [FS_44]
+        Select Operator [SEL_43] (rows=5 width=8)
           Output:["_col0","_col1"]
-          Merge Join Operator [MERGEJOIN_67] (rows=5 width=8)
-            Conds:RS_42._col1, _col4=RS_43._col0, _col1(Inner),Output:["_col0","_col3"]
+          Merge Join Operator [MERGEJOIN_65] (rows=5 width=8)
+            Conds:RS_40._col1, _col4=RS_41._col0, _col1(Left Semi),Output:["_col0","_col3"]
           <-Reducer 3 [SIMPLE_EDGE] llap
-            SHUFFLE [RS_42]
+            SHUFFLE [RS_40]
               PartitionCols:_col1, _col4
-              Merge Join Operator [MERGEJOIN_64] (rows=5 width=16)
-                Conds:RS_39._col0=RS_40._col1(Inner),Output:["_col0","_col1","_col3","_col4"]
+              Merge Join Operator [MERGEJOIN_62] (rows=5 width=16)
+                Conds:RS_35._col0=RS_36._col1(Inner),Output:["_col0","_col1","_col3","_col4"]
               <-Map 5 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_40]
+                SHUFFLE [RS_36]
                   PartitionCols:_col1
                   Select Operator [SEL_9] (rows=17 width=16)
                     Output:["_col0","_col1","_col2","_col3"]
-                    Filter Operator [FIL_60] (rows=17 width=16)
+                    Filter Operator [FIL_58] (rows=17 width=16)
                       predicate:((l_linenumber = 1) and l_partkey is not null)
                       TableScan [TS_7] (rows=100 width=16)
                         default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
               <-Reducer 2 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_39]
+                SHUFFLE [RS_35]
                   PartitionCols:_col0
                   Group By Operator [GBY_5] (rows=50 width=4)
                     Output:["_col0"],keys:KEY._col0
@@ -2222,66 +2209,63 @@ Stage-0
                       PartitionCols:_col0
                       Group By Operator [GBY_3] (rows=50 width=4)
                         Output:["_col0"],keys:l_partkey
-                        Filter Operator [FIL_59] (rows=100 width=4)
+                        Filter Operator [FIL_57] (rows=100 width=4)
                           predicate:l_partkey is not null
                           TableScan [TS_0] (rows=100 width=4)
                             default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
-          <-Reducer 8 [SIMPLE_EDGE] llap
-            SHUFFLE [RS_43]
+          <-Reducer 7 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_41]
               PartitionCols:_col0, _col1
-              Group By Operator [GBY_37] (rows=4 width=8)
-                Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-              <-Reducer 7 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_36]
-                  PartitionCols:_col0, _col1
-                  Group By Operator [GBY_35] (rows=4 width=8)
-                    Output:["_col0","_col1"],keys:_col0, _col3
-                    Merge Join Operator [MERGEJOIN_66] (rows=14 width=8)
-                      Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"]
-                    <-Map 6 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_31]
-                        PartitionCols:_col1
-                        Select Operator [SEL_12] (rows=14 width=95)
-                          Output:["_col0","_col1"]
-                          Filter Operator [FIL_61] (rows=14 width=96)
-                            predicate:(l_shipmode = 'AIR')
-                            TableScan [TS_10] (rows=100 width=96)
-                              default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
-                    <-Reducer 12 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_32]
-                        PartitionCols:_col0
-                        Group By Operator [GBY_29] (rows=3 width=4)
-                          Output:["_col0"],keys:KEY._col0
-                        <-Reducer 11 [SIMPLE_EDGE] llap
-                          SHUFFLE [RS_28]
-                            PartitionCols:_col0
-                            Group By Operator [GBY_27] (rows=3 width=4)
-                              Output:["_col0"],keys:_col2
-                              Merge Join Operator [MERGEJOIN_65] (rows=34 width=4)
-                                Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"]
-                              <-Map 13 [SIMPLE_EDGE] llap
-                                SHUFFLE [RS_24]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_22] (rows=100 width=8)
-                                    Output:["_col0","_col1"]
-                                    Filter Operator [FIL_63] (rows=100 width=8)
-                                      predicate:l_partkey is not null
-                                      TableScan [TS_20] (rows=100 width=8)
-                                        default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"]
-                              <-Reducer 10 [SIMPLE_EDGE] llap
-                                SHUFFLE [RS_23]
-                                  PartitionCols:_col0
-                                  Group By Operator [GBY_18] (rows=50 width=4)
-                                    Output:["_col0"],keys:KEY._col0
-                                  <-Map 9 [SIMPLE_EDGE] llap
-                                    SHUFFLE [RS_17]
-                                      PartitionCols:_col0
-                                      Group By Operator [GBY_16] (rows=50 width=4)
-                                        Output:["_col0"],keys:l_partkey
-                                        Filter Operator [FIL_62] (rows=100 width=4)
-                                          predicate:l_partkey is not null
-                                          TableScan [TS_13] (rows=100 width=4)
-                                            default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
+              Group By Operator [GBY_39] (rows=4 width=8)
+                Output:["_col0","_col1"],keys:_col0, _col1
+                Select Operator [SEL_34] (rows=14 width=8)
+                  Output:["_col0","_col1"]
+                  Merge Join Operator [MERGEJOIN_64] (rows=14 width=8)
+                    Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"]
+                  <-Map 6 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_31]
+                      PartitionCols:_col1
+                      Select Operator [SEL_12] (rows=14 width=95)
+                        Output:["_col0","_col1"]
+                        Filter Operator [FIL_59] (rows=14 width=96)
+                          predicate:(l_shipmode = 'AIR')
+                          TableScan [TS_10] (rows=100 width=96)
+                            default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
+                  <-Reducer 11 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_32]
+                      PartitionCols:_col0
+                      Group By Operator [GBY_29] (rows=3 width=4)
+                        Output:["_col0"],keys:KEY._col0
+                      <-Reducer 10 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_28]
+                          PartitionCols:_col0
+                          Group By Operator [GBY_27] (rows=3 width=4)
+                            Output:["_col0"],keys:_col2
+                            Merge Join Operator [MERGEJOIN_63] (rows=34 width=4)
+                              Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"]
+                            <-Map 12 [SIMPLE_EDGE] llap
+                              SHUFFLE [RS_24]
+                                PartitionCols:_col0
+                                Select Operator [SEL_22] (rows=100 width=8)
+                                  Output:["_col0","_col1"]
+                                  Filter Operator [FIL_61] (rows=100 width=8)
+                                    predicate:l_partkey is not null
+                                    TableScan [TS_20] (rows=100 width=8)
+                                      default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"]
+                            <-Reducer 9 [SIMPLE_EDGE] llap
+                              SHUFFLE [RS_23]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_18] (rows=50 width=4)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Map 8 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_17]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_16] (rows=50 width=4)
+                                      Output:["_col0"],keys:l_partkey
+                                      Filter Operator [FIL_60] (rows=100 width=4)
+                                        predicate:l_partkey is not null
+                                        TableScan [TS_13] (rows=100 width=4)
+                                          default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
 
 PREHOOK: query: explain select key, value, count(*) 
 from src_cbo b
@@ -2300,23 +2284,22 @@ Plan optimized by CBO.
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
 Reducer 6 <- Map 5 (SIMPLE_EDGE)
 Reducer 8 <- Map 7 (SIMPLE_EDGE)
-Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_36]
-        Merge Join Operator [MERGEJOIN_49] (rows=34 width=186)
-          Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col0","_col1","_col2"]
+      File Output Operator [FS_33]
+        Merge Join Operator [MERGEJOIN_46] (rows=34 width=186)
+          Conds:RS_29._col2=RS_30._col0(Left Semi),Output:["_col0","_col1","_col2"]
         <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_32]
+          SHUFFLE [RS_29]
             PartitionCols:_col2
-            Filter Operator [FIL_42] (rows=83 width=186)
+            Filter Operator [FIL_39] (rows=83 width=186)
               predicate:_col2 is not null
               Group By Operator [GBY_16] (rows=83 width=186)
                 Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1
@@ -2325,14 +2308,14 @@ Stage-0
                   PartitionCols:_col0, _col1
                   Group By Operator [GBY_14] (rows=83 width=186)
                     Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
-                    Merge Join Operator [MERGEJOIN_48] (rows=166 width=178)
+                    Merge Join Operator [MERGEJOIN_45] (rows=166 width=178)
                       Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"]
                     <-Map 1 [SIMPLE_EDGE] llap
                       SHUFFLE [RS_10]
                         PartitionCols:_col0
                         Select Operator [SEL_2] (rows=166 width=178)
                           Output:["_col0","_col1"]
-                          Filter Operator [FIL_43] (rows=166 width=178)
+                          Filter Operator [FIL_40] (rows=166 width=178)
                             predicate:(key > '8')
                             TableScan [TS_0] (rows=500 width=178)
                               default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
@@ -2346,35 +2329,32 @@ Stage-0
                             PartitionCols:_col0
                             Group By Operator [GBY_6] (rows=69 width=87)
                               Output:["_col0"],keys:key
-                              Filter Operator [FIL_44] (rows=166 width=87)
+                              Filter Operator [FIL_41] (rows=166 width=87)
                                 predicate:(key > '8')
                                 TableScan [TS_3] (rows=500 width=87)
                                   default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-        <-Reducer 9 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_33]
+        <-Reducer 8 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_30]
             PartitionCols:_col0
-            Group By Operator [GBY_30] (rows=34 width=8)
-              Output:["_col0"],keys:KEY._col0
-            <-Reducer 8 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_29]
-                PartitionCols:_col0
-                Group By Operator [GBY_28] (rows=34 width=8)
-                  Output:["_col0"],keys:_col1
-                  Filter Operator [FIL_45] (rows=69 width=8)
-                    predicate:_col1 is not null
-                    Select Operator [SEL_47] (rows=69 width=8)
-                      Output:["_col1"]
-                      Group By Operator [GBY_24] (rows=69 width=95)
-                        Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
-                      <-Map 7 [SIMPLE_EDGE] llap
-                        SHUFFLE [RS_23]
-                          PartitionCols:_col0
-                          Group By Operator [GBY_22] (rows=69 width=95)
-                            Output:["_col0","_col1"],aggregations:["count()"],keys:key
-                            Filter Operator [FIL_46] (rows=166 width=87)
-                              predicate:(key > '9')
-                              TableScan [TS_19] (rows=500 width=87)
-                                default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+            Group By Operator [GBY_28] (rows=34 width=8)
+              Output:["_col0"],keys:_col0
+              Select Operator [SEL_26] (rows=69 width=8)
+                Output:["_col0"]
+                Filter Operator [FIL_42] (rows=69 width=8)
+                  predicate:_col1 is not null
+                  Select Operator [SEL_44] (rows=69 width=8)
+                    Output:["_col1"]
+                    Group By Operator [GBY_24] (rows=69 width=95)
+                      Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
+                    <-Map 7 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_23]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_22] (rows=69 width=95)
+                          Output:["_col0","_col1"],aggregations:["count()"],keys:key
+                          Filter Operator [FIL_43] (rows=166 width=87)
+                            predicate:(key > '9')
+                            TableScan [TS_19] (rows=500 width=87)
+                              default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
 
 PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) 
 from part 
@@ -2392,20 +2372,19 @@ Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
 Reducer 5 <- Map 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 3 llap
-      File Output Operator [FS_23]
-        Merge Join Operator [MERGEJOIN_28] (rows=6 width=227)
-          Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0","_col1","_col2"]
+      File Output Operator [FS_21]
+        Merge Join Operator [MERGEJOIN_26] (rows=6 width=227)
+          Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"]
         <-Reducer 2 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_19]
+          SHUFFLE [RS_17]
             PartitionCols:_col1
             Select Operator [SEL_6] (rows=13 width=227)
               Output:["_col0","_col1","_col2"]
@@ -2416,33 +2395,28 @@ Stage-0
                   PartitionCols:_col0, _col1
                   Group By Operator [GBY_3] (rows=13 width=295)
                     Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr
-                    Filter Operator [FIL_26] (rows=26 width=223)
+                    Filter Operator [FIL_24] (rows=26 width=223)
                       predicate:p_name is not null
                       TableScan [TS_0] (rows=26 width=223)
                         default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
-        <-Reducer 6 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_20]
+        <-Reducer 5 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_18]
             PartitionCols:_col0
-            Group By Operator [GBY_17] (rows=13 width=184)
-              Output:["_col0"],keys:KEY._col0
-            <-Reducer 5 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_16]
-                PartitionCols:_col0
-                Group By Operator [GBY_15] (rows=13 width=184)
-                  Output:["_col0"],keys:_col0
-                  Select Operator [SEL_11] (rows=26 width=491)
-                    Output:["_col0"]
-                    Filter Operator [FIL_27] (rows=26 width=491)
-                      predicate:first_value_window_0 is not null
-                      PTF Operator [PTF_10] (rows=26 width=491)
-                        Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
-                        Select Operator [SEL_9] (rows=26 width=491)
-                          Output:["_col1","_col2","_col5"]
-                        <-Map 4 [SIMPLE_EDGE] llap
-                          SHUFFLE [RS_8]
-                            PartitionCols:p_mfgr
-                            TableScan [TS_7] (rows=26 width=223)
-                              default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
+            Group By Operator [GBY_16] (rows=13 width=184)
+              Output:["_col0"],keys:_col0
+              Select Operator [SEL_11] (rows=26 width=184)
+                Output:["_col0"]
+                Filter Operator [FIL_25] (rows=26 width=491)
+                  predicate:first_value_window_0 is not null
+                  PTF Operator [PTF_10] (rows=26 width=491)
+                    Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
+                    Select Operator [SEL_9] (rows=26 width=491)
+                      Output:["_col1","_col2","_col5"]
+                    <-Map 4 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_8]
+                        PartitionCols:p_mfgr
+                        TableScan [TS_7] (rows=26 width=223)
+                          default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
 
 PREHOOK: query: explain select * 
 from src_cbo 

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out b/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
index a11bbc4..611d929 100644
--- a/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
@@ -108,3 +108,209 @@ POSTHOOK: query: drop table things
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@things
 POSTHOOK: Output: default@things
+Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: p_name is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_name (type: string), p_type (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p1
+                  Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: p_name is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_name (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: p2
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1
+                Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: string)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0
+                Statistics: Num rows: 676 Data size: 81796 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL
+PREHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: part
+          Select Operator
+            expressions: p_type (type: string)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL

http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index f092967..495ad09 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -178,7 +178,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = (. (tok_table_or_col $hdt$_1) key))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltyp
 esorc.ctinyint"}]}
 311	val_311
 Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select key, value from src1