You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by ha...@apache.org on 2016/12/16 18:28:14 UTC

[01/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 2ae78f01b -> 382dc2084


http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
index 3468657..af42e41 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
@@ -1,4 +1,4 @@
-Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-8:MAPRED' is a cross product
 PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly.
 explain
 select *
@@ -17,8 +17,14 @@ order by key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-4 is a root stage
-  Stage-8 depends on stages: Stage-4
-  Stage-3 depends on stages: Stage-8
+  Stage-10 depends on stages: Stage-4
+  Stage-8 depends on stages: Stage-10
+  Stage-7 depends on stages: Stage-5, Stage-8 , consists of Stage-9, Stage-2
+  Stage-9 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-9
+  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-2
+  Stage-5 is a root stage
   Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
@@ -28,47 +34,38 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: bigint)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(), count(key)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(VALUE._col0), count(VALUE._col1)
           mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-8
+  Stage: Stage-10
     Map Reduce Local Work
       Alias -> Map Local Tables:
         $hdt$_0:src 
           Fetch Operator
             limit: -1
-        $hdt$_2:src 
-          Fetch Operator
-            limit: -1
       Alias -> Map Local Operator Tree:
         $hdt$_0:src 
           TableScan
@@ -82,20 +79,8 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-        $hdt$_2:src 
-          TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
 
-  Stage: Stage-3
+  Stage: Stage-8
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -105,50 +90,164 @@ STAGE PLANS:
               keys:
                 0 
                 1 
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 9812 Basic stats: COMPLETE Column stats: NONE
-              Map Join Operator
-                condition map:
-                     Left Outer Join0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 550 Data size: 10793 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: _col3 is null (type: boolean)
-                  Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-9
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col5
+              Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE
+              Filter Operator
+                predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
+                Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Local Work:
         Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: boolean)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col5
+          Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
+            Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: string), true (type: boolean)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: boolean)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-8:MAPRED' is a cross product
 PREHOOK: query: select *
 from src
 where not key in

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
index 160b088..b2b9a3b 100644
--- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
@@ -23,19 +23,20 @@ where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2
-  Stage-9 has a backup stage: Stage-2
-  Stage-5 depends on stages: Stage-9
-  Stage-10 has a backup stage: Stage-2
+  Stage-1 is a root stage
+  Stage-12 depends on stages: Stage-1
+  Stage-9 depends on stages: Stage-12
+  Stage-8 depends on stages: Stage-5, Stage-9 , consists of Stage-10, Stage-11, Stage-3
+  Stage-10 has a backup stage: Stage-3
   Stage-6 depends on stages: Stage-10
-  Stage-2
-  Stage-11 is a root stage
-  Stage-8 depends on stages: Stage-11
-  Stage-0 depends on stages: Stage-5, Stage-6, Stage-2
+  Stage-11 has a backup stage: Stage-3
+  Stage-7 depends on stages: Stage-11
+  Stage-3
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-6, Stage-7, Stage-3
 
 STAGE PLANS:
-  Stage: Stage-4
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -67,10 +68,54 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-7
-    Conditional Operator
+  Stage: Stage-12
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_1:li 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_1:li 
+          TableScan
+            alias: li
+            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
+              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col1 (type: int)
 
   Stage: Stage-9
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: int)
+                1 _col1 (type: int)
+              outputColumnNames: _col0, _col1, _col3
+              Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-10
     Map Reduce Local Work
       Alias -> Map Local Tables:
         $INTNAME1 
@@ -84,7 +129,7 @@ STAGE PLANS:
                 0 _col1 (type: int)
                 1 _col0 (type: int)
 
-  Stage: Stage-5
+  Stage: Stage-6
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -94,10 +139,10 @@ STAGE PLANS:
               keys:
                 0 _col1 (type: int)
                 1 _col0 (type: int)
-              outputColumnNames: _col2, _col4
+              outputColumnNames: _col0, _col3
               Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: _col4 (type: int), _col2 (type: int)
+                expressions: _col0 (type: int), _col3 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
@@ -110,7 +155,7 @@ STAGE PLANS:
       Local Work:
         Map Reduce Local Work
 
-  Stage: Stage-10
+  Stage: Stage-11
     Map Reduce Local Work
       Alias -> Map Local Tables:
         $INTNAME 
@@ -124,7 +169,7 @@ STAGE PLANS:
                 0 _col1 (type: int)
                 1 _col0 (type: int)
 
-  Stage: Stage-6
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -134,10 +179,10 @@ STAGE PLANS:
               keys:
                 0 _col1 (type: int)
                 1 _col0 (type: int)
-              outputColumnNames: _col2, _col4
+              outputColumnNames: _col0, _col3
               Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: _col4 (type: int), _col2 (type: int)
+                expressions: _col0 (type: int), _col3 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
@@ -150,7 +195,7 @@ STAGE PLANS:
       Local Work:
         Map Reduce Local Work
 
-  Stage: Stage-2
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -159,13 +204,13 @@ STAGE PLANS:
               sort order: +
               Map-reduce partition columns: _col1 (type: int)
               Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col2 (type: int)
+              value expressions: _col0 (type: int), _col3 (type: int)
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: int)
               sort order: +
               Map-reduce partition columns: _col0 (type: int)
-              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -173,10 +218,10 @@ STAGE PLANS:
           keys:
             0 _col1 (type: int)
             1 _col0 (type: int)
-          outputColumnNames: _col2, _col4
+          outputColumnNames: _col0, _col3
           Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
           Select Operator
-            expressions: _col4 (type: int), _col2 (type: int)
+            expressions: _col0 (type: int), _col3 (type: int)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
@@ -187,63 +232,41 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-11
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_2:lineitem 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_2:lineitem 
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: lineitem
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
+              predicate: (l_shipmode = 'AIR') (type: boolean)
               Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: l_orderkey (type: int)
-                outputColumnNames: _col0
+                outputColumnNames: l_orderkey
                 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  keys: _col0 (type: int)
+                  keys: l_orderkey (type: int)
                   mode: hash
                   outputColumnNames: _col0
                   Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                  HashTable Sink Operator
-                    keys:
-                      0 _col0 (type: int)
-                      1 _col0 (type: int)
-
-  Stage: Stage-8
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: li
-            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
-              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Left Semi Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col1, _col2
-                  Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-      Local Work:
-        Map Reduce Local Work
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -290,19 +313,24 @@ where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2
-  Stage-9 has a backup stage: Stage-2
-  Stage-5 depends on stages: Stage-9
-  Stage-10 has a backup stage: Stage-2
-  Stage-6 depends on stages: Stage-10
-  Stage-2
-  Stage-11 is a root stage
-  Stage-8 depends on stages: Stage-11
-  Stage-0 depends on stages: Stage-5, Stage-6, Stage-2
+  Stage-1 is a root stage
+  Stage-19 depends on stages: Stage-1
+  Stage-14 depends on stages: Stage-19
+  Stage-13 depends on stages: Stage-6, Stage-14 , consists of Stage-17, Stage-18, Stage-3
+  Stage-17 has a backup stage: Stage-3
+  Stage-11 depends on stages: Stage-17
+  Stage-18 has a backup stage: Stage-3
+  Stage-12 depends on stages: Stage-18
+  Stage-3
+  Stage-7 is a root stage
+  Stage-21 depends on stages: Stage-7
+  Stage-9 depends on stages: Stage-21
+  Stage-20 depends on stages: Stage-9
+  Stage-6 depends on stages: Stage-20
+  Stage-0 depends on stages: Stage-11, Stage-12, Stage-3
 
 STAGE PLANS:
-  Stage: Stage-4
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -334,10 +362,54 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-7
+  Stage: Stage-19
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_1:li 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_1:li 
+          TableScan
+            alias: li
+            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
+              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col1 (type: int)
+
+  Stage: Stage-14
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: int)
+                1 _col1 (type: int)
+              outputColumnNames: _col0, _col1, _col3, _col4
+              Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-13
     Conditional Operator
 
-  Stage: Stage-9
+  Stage: Stage-17
     Map Reduce Local Work
       Alias -> Map Local Tables:
         $INTNAME1 
@@ -348,10 +420,10 @@ STAGE PLANS:
           TableScan
             HashTable Sink Operator
               keys:
-                0 _col1 (type: int)
-                1 _col0 (type: int)
+                0 _col1 (type: int), _col4 (type: int)
+                1 _col0 (type: int), _col1 (type: int)
 
-  Stage: Stage-5
+  Stage: Stage-11
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -359,12 +431,12 @@ STAGE PLANS:
               condition map:
                    Inner Join 0 to 1
               keys:
-                0 _col1 (type: int)
-                1 _col0 (type: int)
-              outputColumnNames: _col2, _col4
+                0 _col1 (type: int), _col4 (type: int)
+                1 _col0 (type: int), _col1 (type: int)
+              outputColumnNames: _col0, _col3
               Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: _col4 (type: int), _col2 (type: int)
+                expressions: _col0 (type: int), _col3 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
@@ -377,7 +449,7 @@ STAGE PLANS:
       Local Work:
         Map Reduce Local Work
 
-  Stage: Stage-10
+  Stage: Stage-18
     Map Reduce Local Work
       Alias -> Map Local Tables:
         $INTNAME 
@@ -388,10 +460,10 @@ STAGE PLANS:
           TableScan
             HashTable Sink Operator
               keys:
-                0 _col1 (type: int)
-                1 _col0 (type: int)
+                0 _col1 (type: int), _col4 (type: int)
+                1 _col0 (type: int), _col1 (type: int)
 
-  Stage: Stage-6
+  Stage: Stage-12
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -399,12 +471,12 @@ STAGE PLANS:
               condition map:
                    Inner Join 0 to 1
               keys:
-                0 _col1 (type: int)
-                1 _col0 (type: int)
-              outputColumnNames: _col2, _col4
+                0 _col1 (type: int), _col4 (type: int)
+                1 _col0 (type: int), _col1 (type: int)
+              outputColumnNames: _col0, _col3
               Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: _col4 (type: int), _col2 (type: int)
+                expressions: _col0 (type: int), _col3 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
@@ -417,33 +489,33 @@ STAGE PLANS:
       Local Work:
         Map Reduce Local Work
 
-  Stage: Stage-2
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col1 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col1 (type: int)
+              key expressions: _col1 (type: int), _col4 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col1 (type: int), _col4 (type: int)
               Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col2 (type: int)
+              value expressions: _col0 (type: int), _col3 (type: int)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: int)
-              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+              key expressions: _col0 (type: int), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+              Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col1 (type: int)
-            1 _col0 (type: int)
-          outputColumnNames: _col2, _col4
+            0 _col1 (type: int), _col4 (type: int)
+            1 _col0 (type: int), _col1 (type: int)
+          outputColumnNames: _col0, _col3
           Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
           Select Operator
-            expressions: _col4 (type: int), _col2 (type: int)
+            expressions: _col0 (type: int), _col3 (type: int)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
@@ -454,63 +526,157 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-11
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: lineitem
+            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: l_partkey is not null (type: boolean)
+              Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: l_partkey (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-21
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_2:lineitem 
+        $hdt$_2:$hdt$_3:$hdt$_4:li 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_2:lineitem 
+        $hdt$_2:$hdt$_3:$hdt$_4:li 
           TableScan
-            alias: lineitem
+            alias: li
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean)
-              Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+              predicate: l_partkey is not null (type: boolean)
+              Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), 1 (type: int)
+                expressions: l_partkey (type: int), l_linenumber (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: int), _col1 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                  HashTable Sink Operator
-                    keys:
-                      0 _col0 (type: int), 1 (type: int)
-                      1 _col0 (type: int), _col1 (type: int)
+                Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
 
-  Stage: Stage-8
+  Stage: Stage-9
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: li
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: int)
+                1 _col0 (type: int)
+              outputColumnNames: _col2
+              Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col2 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 55 Data size: 6599 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-20
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_2:$hdt$_2:lineitem 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_2:$hdt$_2:lineitem 
+          TableScan
+            alias: lineitem
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
+              predicate: (l_shipmode = 'AIR') (type: boolean)
               Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
-                outputColumnNames: _col0, _col1, _col2
+                expressions: l_orderkey (type: int), l_linenumber (type: int)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Left Semi Join 0 to 1
+                HashTable Sink Operator
                   keys:
-                    0 _col0 (type: int), 1 (type: int)
-                    1 _col0 (type: int), _col1 (type: int)
-                  outputColumnNames: _col1, _col2
-                  Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    0 _col1 (type: int)
+                    1 _col0 (type: int)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col1 (type: int)
+                1 _col0 (type: int)
+              outputColumnNames: _col0, _col3
+              Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: int), _col3 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                  Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE
       Local Work:
         Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator

[09/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/masking_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_3.q.out b/ql/src/test/results/clientpositive/masking_3.q.out
index 1925dce..2d8a79e 100644
--- a/ql/src/test/results/clientpositive/masking_3.q.out
+++ b/ql/src/test/results/clientpositive/masking_3.q.out
@@ -15,56 +15,141 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from masking_test_subq
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2, _col3
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: double), _col3 (type: double)
-                  sort order: ++
-                  Map-reduce partition columns: _col2 (type: double), _col3 (type: double)
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col2 (type: double), _col3 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                sort order: ++
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
@@ -596,10 +681,112 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from masking_test_subq where key > 0
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -607,50 +794,36 @@ STAGE PLANS:
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean)
+              predicate: (key > 0) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2, _col3
+                expressions: key (type: int), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: double), _col3 (type: double)
+                  key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   sort order: ++
-                  Map-reduce partition columns: _col2 (type: double), _col3 (type: double)
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: string)
+                  value expressions: _col1 (type: string)
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col2 (type: double), _col3 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1174,10 +1347,112 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select key from masking_test_subq where key > 0
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -1185,50 +1460,35 @@ STAGE PLANS:
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean)
+              predicate: (key > 0) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: key (type: int), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2
+                expressions: key (type: int)
+                outputColumnNames: _col0
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: double), _col2 (type: double)
+                  key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   sort order: ++
-                  Map-reduce partition columns: _col1 (type: double), _col2 (type: double)
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int)
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col1 (type: double), _col2 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1752,65 +2012,157 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select value from masking_test_subq where key > 0
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: double), _col2 (type: double)
-                  sort order: ++
-                  Map-reduce partition columns: _col1 (type: double), _col2 (type: double)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col1 (type: double), _col2 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
-          outputColumnNames: _col0
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key > 0) (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
+          outputColumnNames: _col1
+          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: string)
+            outputColumnNames: _col0
+            Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -2330,57 +2682,145 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-5 is a root stage
+  Stage-3 depends on stages: Stage-5
+  Stage-4 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-4
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (UDFToDouble(key) is not null and key is not null) (type: boolean)
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2, _col3
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: double), _col3 (type: double)
-                  sort order: ++
-                  Map-reduce partition columns: _col2 (type: double), _col3 (type: double)
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
+                expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col2 (type: double), _col3 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
@@ -6576,10 +7016,112 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from default.masking_test_subq where key > 0
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -6587,50 +7129,36 @@ STAGE PLANS:
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean)
+              predicate: (key > 0) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2, _col3
+                expressions: key (type: int), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: double), _col3 (type: double)
+                  key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   sort order: ++
-                  Map-reduce partition columns: _col2 (type: double), _col3 (type: double)
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: string)
+                  value expressions: _col1 (type: string)
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col2 (type: double), _col3 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -7154,10 +7682,112 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from masking_test_subq where masking_test_subq.key > 0
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -7165,50 +7795,36 @@ STAGE PLANS:
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > 0) and UDFToDouble(key) is not null) (type: boolean)
+              predicate: (key > 0) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2, _col3
+                expressions: key (type: int), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: double), _col3 (type: double)
+                  key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   sort order: ++
-                  Map-reduce partition columns: _col2 (type: double), _col3 (type: double)
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: string)
+                  value expressions: _col1 (type: string)
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col2 (type: double), _col3 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/masking_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_4.q.out b/ql/src/test/results/clientpositive/masking_4.q.out
index 7e923e8..0a81c44 100644
--- a/ql/src/test/results/clientpositive/masking_4.q.out
+++ b/ql/src/test/results/clientpositive/masking_4.q.out
@@ -165,56 +165,141 @@ with q1 as ( select * from masking_test where key = '5')
 select * from masking_test_subq
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: masking_test_subq
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
+            Select Operator
+              expressions: key (type: int)
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1, _col2, _col3
+              Group By Operator
+                keys: key (type: int)
+                mode: hash
+                outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: double), _col3 (type: double)
-                  sort order: ++
-                  Map-reduce partition columns: _col2 (type: double), _col3 (type: double)
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: UDFToDouble(key) is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
-                outputColumnNames: _col0, _col1
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double)
+                sort order: +
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double)
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: double), _col1 (type: double)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: double), _col1 (type: double)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double)
+              sort order: +
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 UDFToDouble(_col0) (type: double)
+            1 UDFToDouble(_col0) (type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test_subq
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                sort order: ++
+                Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col2 (type: double), _col3 (type: double)
-            1 _col0 (type: double), _col1 (type: double)
+            0 UDFToDouble(_col0) (type: double), _col0 (type: int)
+            1 UDFToDouble(_col0) (type: double), _col1 (type: int)
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/perf/query45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query45.q.out b/ql/src/test/results/clientpositive/perf/query45.q.out
index 7bc137c..e2d0da5 100644
--- a/ql/src/test/results/clientpositive/perf/query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/query45.q.out
@@ -5,111 +5,117 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
+Reducer 11 <- Map 10 (SIMPLE_EDGE)
+Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:100
     Stage-1
       Reducer 5
-      File Output Operator [FS_45]
-        Limit [LIM_44] (rows=100 width=135)
+      File Output Operator [FS_47]
+        Limit [LIM_46] (rows=100 width=135)
           Number of rows:100
-          Select Operator [SEL_43] (rows=95833781 width=135)
+          Select Operator [SEL_45] (rows=95833781 width=135)
             Output:["_col0","_col1","_col2"]
           <-Reducer 4 [SIMPLE_EDGE]
-            SHUFFLE [RS_42]
-              Group By Operator [GBY_40] (rows=95833781 width=135)
+            SHUFFLE [RS_44]
+              Group By Operator [GBY_42] (rows=95833781 width=135)
                 Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
               <-Reducer 3 [SIMPLE_EDGE]
-                SHUFFLE [RS_39]
+                SHUFFLE [RS_41]
                   PartitionCols:_col0, _col1
-                  Group By Operator [GBY_38] (rows=191667562 width=135)
-                    Output:["_col0","_col1","_col2"],aggregations:["sum(_col10)"],keys:_col4, _col3
-                    Select Operator [SEL_37] (rows=191667562 width=135)
-                      Output:["_col4","_col3","_col10"]
-                      Merge Join Operator [MERGEJOIN_72] (rows=191667562 width=135)
-                        Conds:RS_34._col0=RS_35._col4(Inner),Output:["_col3","_col4","_col10"]
+                  Group By Operator [GBY_40] (rows=191667562 width=135)
+                    Output:["_col0","_col1","_col2"],aggregations:["sum(_col11)"],keys:_col4, _col3
+                    Select Operator [SEL_39] (rows=191667562 width=135)
+                      Output:["_col4","_col3","_col11"]
+                      Merge Join Operator [MERGEJOIN_74] (rows=191667562 width=135)
+                        Conds:RS_36._col0=RS_37._col5(Inner),Output:["_col3","_col4","_col11"]
                       <-Reducer 2 [SIMPLE_EDGE]
-                        SHUFFLE [RS_34]
+                        SHUFFLE [RS_36]
                           PartitionCols:_col0
-                          Merge Join Operator [MERGEJOIN_68] (rows=88000001 width=860)
-                            Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3","_col4"]
+                          Merge Join Operator [MERGEJOIN_70] (rows=88000001 width=860)
+                            Conds:RS_33._col1=RS_34._col0(Inner),Output:["_col0","_col3","_col4"]
                           <-Map 1 [SIMPLE_EDGE]
-                            SHUFFLE [RS_31]
+                            SHUFFLE [RS_33]
                               PartitionCols:_col1
                               Select Operator [SEL_2] (rows=80000000 width=860)
                                 Output:["_col0","_col1"]
-                                Filter Operator [FIL_62] (rows=80000000 width=860)
+                                Filter Operator [FIL_64] (rows=80000000 width=860)
                                   predicate:(c_customer_sk is not null and c_current_addr_sk is not null)
                                   TableScan [TS_0] (rows=80000000 width=860)
                                     default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
                           <-Map 6 [SIMPLE_EDGE]
-                            SHUFFLE [RS_32]
+                            SHUFFLE [RS_34]
                               PartitionCols:_col0
                               Select Operator [SEL_5] (rows=40000000 width=1014)
                                 Output:["_col0","_col1","_col2"]
-                                Filter Operator [FIL_63] (rows=40000000 width=1014)
+                                Filter Operator [FIL_65] (rows=40000000 width=1014)
                                   predicate:ca_address_sk is not null
                                   TableScan [TS_3] (rows=40000000 width=1014)
                                     default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_zip"]
                       <-Reducer 9 [SIMPLE_EDGE]
-                        SHUFFLE [RS_35]
-                          PartitionCols:_col4
-                          Merge Join Operator [MERGEJOIN_71] (rows=174243235 width=135)
-                            Conds:RS_27._col0=RS_28._col1(Inner),Output:["_col4","_col5"]
-                          <-Reducer 12 [SIMPLE_EDGE]
-                            SHUFFLE [RS_28]
+                        SHUFFLE [RS_37]
+                          PartitionCols:_col5
+                          Merge Join Operator [MERGEJOIN_73] (rows=174243235 width=135)
+                            Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col5","_col6"]
+                          <-Reducer 13 [SIMPLE_EDGE]
+                            SHUFFLE [RS_30]
                               PartitionCols:_col1
-                              Merge Join Operator [MERGEJOIN_70] (rows=158402938 width=135)
-                                Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"]
-                              <-Map 11 [SIMPLE_EDGE]
-                                SHUFFLE [RS_18]
+                              Merge Join Operator [MERGEJOIN_72] (rows=158402938 width=135)
+                                Conds:RS_22._col0=RS_23._col0(Inner),Output:["_col1","_col2","_col3"]
+                              <-Map 12 [SIMPLE_EDGE]
+                                SHUFFLE [RS_22]
                                   PartitionCols:_col0
-                                  Select Operator [SEL_14] (rows=144002668 width=135)
+                                  Select Operator [SEL_18] (rows=144002668 width=135)
                                     Output:["_col0","_col1","_col2","_col3"]
-                                    Filter Operator [FIL_66] (rows=144002668 width=135)
+                                    Filter Operator [FIL_68] (rows=144002668 width=135)
                                       predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null)
-                                      TableScan [TS_12] (rows=144002668 width=135)
+                                      TableScan [TS_16] (rows=144002668 width=135)
                                         default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"]
-                              <-Map 13 [SIMPLE_EDGE]
-                                SHUFFLE [RS_19]
+                              <-Map 14 [SIMPLE_EDGE]
+                                SHUFFLE [RS_23]
                                   PartitionCols:_col0
-                                  Select Operator [SEL_17] (rows=18262 width=1119)
+                                  Select Operator [SEL_21] (rows=18262 width=1119)
                                     Output:["_col0"]
-                                    Filter Operator [FIL_67] (rows=18262 width=1119)
+                                    Filter Operator [FIL_69] (rows=18262 width=1119)
                                       predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null)
-                                      TableScan [TS_15] (rows=73049 width=1119)
+                                      TableScan [TS_19] (rows=73049 width=1119)
                                         default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"]
                           <-Reducer 8 [SIMPLE_EDGE]
-                            SHUFFLE [RS_27]
+                            SHUFFLE [RS_29]
                               PartitionCols:_col0
-                              Merge Join Operator [MERGEJOIN_69] (rows=508200 width=1436)
-                                Conds:RS_24._col1=RS_25._col0(Left Semi),Output:["_col0"]
-                              <-Map 10 [SIMPLE_EDGE]
-                                SHUFFLE [RS_25]
-                                  PartitionCols:_col0
-                                  Group By Operator [GBY_23] (rows=231000 width=1436)
-                                    Output:["_col0"],keys:_col0
-                                    Select Operator [SEL_11] (rows=231000 width=1436)
-                                      Output:["_col0"]
-                                      Filter Operator [FIL_65] (rows=231000 width=1436)
-                                        predicate:((i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) and i_item_id is not null)
-                                        TableScan [TS_9] (rows=462000 width=1436)
-                                          default@item,i2,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+                              Merge Join Operator [MERGEJOIN_71] (rows=508200 width=1436)
+                                Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0"]
                               <-Map 7 [SIMPLE_EDGE]
-                                SHUFFLE [RS_24]
+                                SHUFFLE [RS_26]
                                   PartitionCols:_col1
                                   Select Operator [SEL_8] (rows=462000 width=1436)
                                     Output:["_col0","_col1"]
-                                    Filter Operator [FIL_64] (rows=462000 width=1436)
-                                      predicate:(i_item_sk is not null and i_item_id is not null)
+                                    Filter Operator [FIL_66] (rows=462000 width=1436)
+                                      predicate:i_item_sk is not null
                                       TableScan [TS_6] (rows=462000 width=1436)
                                         default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+                              <-Reducer 11 [SIMPLE_EDGE]
+                                SHUFFLE [RS_27]
+                                  PartitionCols:_col0
+                                  Group By Operator [GBY_14] (rows=115500 width=1436)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Map 10 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_13]
+                                      PartitionCols:_col0
+                                      Group By Operator [GBY_12] (rows=231000 width=1436)
+                                        Output:["_col0"],keys:i_item_id
+                                        Select Operator [SEL_11] (rows=231000 width=1436)
+                                          Output:["i_item_id"]
+                                          Filter Operator [FIL_67] (rows=231000 width=1436)
+                                            predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+                                            TableScan [TS_9] (rows=462000 width=1436)
+                                              default@item,i2,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]

[17/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/subquery_notin_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin_having.q b/ql/src/test/queries/clientpositive/subquery_notin_having.q
index 05148df..2cd0bec 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin_having.q
@@ -56,3 +56,19 @@ having b.p_mfgr not in
   having max(p_retailprice) - min(p_retailprice) > 600
   )
 ;
+
+--nullability tests
+CREATE TABLE t1 (c1 INT, c2 CHAR(100));
+INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij');
+
+CREATE TABLE t2 (c1 INT);
+INSERT INTO t2 VALUES (null), (2), (100);
+
+explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2);
+SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2);
+
+explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1);
+SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1);
+
+DROP TABLE t1;
+DROP TABLE t2;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_corr_from.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_corr_from.q.out b/ql/src/test/results/clientnegative/subquery_corr_from.q.out
new file mode 100644
index 0000000..663a8bb
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_corr_from.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10004]: Line 3:113 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
new file mode 100644
index 0000000..fb72270
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10249]: Line 4:53 Unsupported SubQuery Expression 'p_name': SubQuery expression refers to both Parent and SubQuery expressions and is not a valid join condition.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_corr_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_corr_select.q.out b/ql/src/test/results/clientnegative/subquery_corr_select.q.out
new file mode 100644
index 0000000..1f3dbb7
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_corr_select.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10004]: Line 2:54 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_in_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_in_groupby.q.out b/ql/src/test/results/clientnegative/subquery_in_groupby.q.out
index 809bb0a..a546d49 100644
--- a/ql/src/test/results/clientnegative/subquery_in_groupby.q.out
+++ b/ql/src/test/results/clientnegative/subquery_in_groupby.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10249]: Line 5:37 Unsupported SubQuery Expression ''9'': Currently SubQuery expressions are only allowed as Where Clause predicates
+FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression  Currently SubQuery expressions are only allowed as Where and Having Clause predicates

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_in_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_in_select.q.out b/ql/src/test/results/clientnegative/subquery_in_select.q.out
index 3d74132..a546d49 100644
--- a/ql/src/test/results/clientnegative/subquery_in_select.q.out
+++ b/ql/src/test/results/clientnegative/subquery_in_select.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10249]: Line 4:35 Unsupported SubQuery Expression ''9'': Currently SubQuery expressions are only allowed as Where Clause predicates
+FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression  Currently SubQuery expressions are only allowed as Where and Having Clause predicates

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out b/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
deleted file mode 100644
index 140b093..0000000
--- a/ql/src/test/results/clientnegative/subquery_nested_subquery.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException [Error 10249]: Line 3:53 Unsupported SubQuery Expression 'p_name': Nested SubQuery expressions are not supported.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientnegative/subquery_restrictions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_restrictions.q.out b/ql/src/test/results/clientnegative/subquery_restrictions.q.out
new file mode 100644
index 0000000..a546d49
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_restrictions.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression  Currently SubQuery expressions are only allowed as Where and Having Clause predicates

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/constant_prop_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out
index 58f1065..066ed07 100644
--- a/ql/src/test/results/clientpositive/constant_prop_3.q.out
+++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out
@@ -88,7 +88,7 @@ POSTHOOK: query: analyze table supplier_hive compute statistics for columns
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@supplier_hive
 #### A masked pattern was here ####
-Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain select
 	p_brand,
 	p_type,
@@ -154,10 +154,11 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1, Stage-7
-  Stage-3 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2, Stage-8
   Stage-4 depends on stages: Stage-3
   Stage-5 depends on stages: Stage-4
   Stage-7 is a root stage
+  Stage-8 is a root stage
   Stage-0 depends on stages: Stage-5
 
 STAGE PLANS:
@@ -223,7 +224,8 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -231,8 +233,8 @@ STAGE PLANS:
           keys:
             0 
             1 
-          outputColumnNames: _col1, _col3, _col4, _col5
-          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7
+          Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -248,23 +250,15 @@ STAGE PLANS:
               key expressions: _col1 (type: int)
               sort order: +
               Map-reduce partition columns: _col1 (type: int)
-              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int)
+              Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint)
           TableScan
-            alias: supplier_hive
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-            Filter Operator
-              predicate: (s_comment like '%Customer%Complaints%') (type: boolean)
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              Select Operator
-                expressions: s_suppkey (type: int)
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              value expressions: _col1 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -272,21 +266,21 @@ STAGE PLANS:
           keys:
             0 _col1 (type: int)
             1 _col0 (type: int)
-          outputColumnNames: _col1, _col3, _col4, _col5, _col7
-          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9
+          Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col7 is null (type: boolean)
-            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            predicate: ((_col6 = 0) or (_col9 is null and _col1 is not null and (_col7 >= _col6))) (type: boolean)
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int)
               outputColumnNames: _col3, _col4, _col5, _col1
-              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(DISTINCT _col1)
                 keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -302,14 +296,14 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int)
               sort order: ++++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
-              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col3:0._col0)
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -324,15 +318,15 @@ STAGE PLANS:
             Reduce Output Operator
               key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int)
               sort order: -+++
-              Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint)
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -345,36 +339,69 @@ STAGE PLANS:
             alias: supplier_hive
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((s_comment like '%Customer%Complaints%') and s_suppkey is null) (type: boolean)
+              predicate: (s_comment like '%Customer%Complaints%') (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
+                expressions: s_suppkey (type: int)
+                outputColumnNames: s_suppkey
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                 Group By Operator
-                  aggregations: count()
+                  aggregations: count(), count(s_suppkey)
                   mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: bigint)
+                    Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(VALUE._col0), count(VALUE._col1)
           mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: supplier_hive
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (s_comment like '%Customer%Complaints%') (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: s_suppkey (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: int), true (type: boolean)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: boolean)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean)
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: boolean)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index d1016ad..d4ccb8c 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -80,10 +80,114 @@ WHERE li.l_linenumber = 1 AND
  li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: li
+            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: l_linenumber (type: int)
+              outputColumnNames: l_linenumber
+              Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: l_linenumber (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: lineitem
+            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (l_shipmode = 'AIR') (type: boolean)
+              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: l_orderkey (type: int), l_linenumber (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col3
+          Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: int), _col3 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+              Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -91,7 +195,7 @@ STAGE PLANS:
             alias: li
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
+              predicate: (l_linenumber = 1) (type: boolean)
               Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
@@ -104,29 +208,15 @@ STAGE PLANS:
                   Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: int), _col2 (type: int)
           TableScan
-            alias: lineitem
-            Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean)
-              Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: l_orderkey (type: int), 1 (type: int)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: int), _col1 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                    Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+              Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: int), 1 (type: int)
             1 _col0 (type: int), _col1 (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index f993cf0..d3acbcd 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -3297,17 +3297,17 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS)
-        Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS)
+        Reducer 7 <- Union 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ds (type: string)
@@ -3340,7 +3340,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3365,7 +3365,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3402,35 +3402,45 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Reducer 8 
+                      Target Vertex: Map 1
+        Reducer 9 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -3438,34 +3448,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
         Union 6 
             Vertex: Union 6
 
@@ -3505,17 +3497,17 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS)
-        Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS)
+        Reducer 7 <- Union 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ds (type: string)
@@ -3548,7 +3540,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3573,7 +3565,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3612,35 +3604,45 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Reducer 8 
+                      Target Vertex: Map 1
+        Reducer 9 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -3648,34 +3650,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
         Union 6 
             Vertex: Union 6
 
@@ -3716,9 +3700,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 9 (CONTAINS)
+        Reducer 10 <- Union 9 (SIMPLE_EDGE)
+        Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 9 (CONTAINS)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
-        Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE)
         Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
         Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS)
 #### A masked pattern was here ####
@@ -3727,7 +3712,6 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: ds (type: string)
@@ -3741,7 +3725,7 @@ STAGE PLANS:
                       Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 10 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3765,7 +3749,6 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: ds (type: string)
@@ -3799,57 +3782,67 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 11 
+        Reducer 10 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
+                      Target Vertex: Map 1
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 5
+                      Target Vertex: Map 5
+        Reducer 12 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -3868,7 +3861,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3902,49 +3895,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 5
         Union 3 
             Vertex: Union 3
         Union 9 
@@ -5688,45 +5648,29 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Union 5 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 5 (CONTAINS)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 4 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ds (type: string)
                     outputColumnNames: _col0
                     Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Map Join Operator
-                      condition map:
-                           Left Semi Join 0 to 1
-                      keys:
-                        0 _col0 (type: string)
-                        1 _col0 (type: string)
-                      outputColumnNames: _col0
-                      input vertices:
-                        1 Union 5
-                      Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -5746,7 +5690,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 6 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -5766,32 +5710,42 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 2 
+        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
+                aggregations: max(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                File Output Operator
-                  compressed: false
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col0
+                  input vertices:
+                    0 Map 1
+                  Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
@@ -5802,22 +5756,22 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Reducer 7 
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 8 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -5825,36 +5779,18 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Union 5 
-            Vertex: Union 5
+        Union 4 
+            Vertex: Union 4
 
   Stage: Stage-0
     Fetch Operator

[12/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index 3da1acb..47a9d6b 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: -- non agg, non corr
 explain
 select * 
@@ -27,8 +27,9 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-        Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -50,21 +51,19 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: s1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Filter Operator
-                      predicate: false (type: boolean)
-                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: bigint)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > '2') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(), count(key)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: bigint)
             Execution mode: llap
             LLAP IO: no inputs
         Map 6 
@@ -79,11 +78,16 @@ STAGE PLANS:
                       expressions: key (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string), true (type: boolean)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: boolean)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+                          Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -95,14 +99,14 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -112,18 +116,18 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 500 Data size: 132500 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3, _col5
+                Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: _col3 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
+                  Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col0 (type: string), _col1 (type: string)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -132,18 +136,28 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
+                aggregations: count(VALUE._col0), count(VALUE._col1)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col0 = 0) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: boolean)
 
   Stage: Stage-0
     Fetch Operator
@@ -151,7 +165,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select * 
 from src 
 where src.key not in  ( select key from src s1 where s1.key > '2')
@@ -285,7 +299,6 @@ POSTHOOK: Input: default@src
 199	val_199
 199	val_199
 2	val_2
-Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: -- non agg, corr
 explain
 select p_mfgr, b.p_name, p_size 
@@ -315,11 +328,18 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 11 <- Map 10 (SIMPLE_EDGE)
+        Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+        Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+        Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
+        Reducer 16 <- Map 15 (SIMPLE_EDGE)
+        Reducer 18 <- Map 17 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE)
-        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -332,12 +352,14 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
-                      sort order: 
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
                       Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                      value expressions: _col0 (type: string), _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 4 
+        Map 10 
             Map Operator Tree:
                 TableScan
                   alias: part
@@ -351,7 +373,41 @@ STAGE PLANS:
                     value expressions: p_name (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 15 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_mfgr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 17 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_name (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: part
@@ -365,49 +421,24 @@ STAGE PLANS:
                     value expressions: p_name (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                  value expressions: _col2 (type: int)
-        Reducer 3 
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_mfgr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Left Outer Join0 to 1
-                keys:
-                  0 _col0 (type: string), _col1 (type: string)
-                  1 _col0 (type: string), _col1 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col4
-                Statistics: Num rows: 26 Data size: 8944 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col4 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE
-                      table:
-                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 5 
+            LLAP IO: no inputs
+        Reducer 11 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
@@ -436,36 +467,143 @@ STAGE PLANS:
                               isPivotResult: true
                   Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 491 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (rank_window_0 <= 2) (type: boolean)
+                    Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      Statistics: Num rows: 1 Data size: 491 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: bigint)
-        Reducer 6 
+                      expressions: _col2 (type: string), _col1 (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+        Reducer 12 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col1 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 13 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: string), _col2 (type: boolean)
+        Reducer 14 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2, _col3
+                Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col3 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col3 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: boolean)
+        Reducer 16 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 18 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col4, _col5
+                Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col3 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
+                Statistics: Num rows: 26 Data size: 6318 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: (_col0 = 0) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+                  Statistics: Num rows: 13 Data size: 3159 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 8 
+                    expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
@@ -497,14 +635,70 @@ STAGE PLANS:
                     predicate: (rank_window_0 <= 2) (type: boolean)
                     Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: _col1 (type: string), _col2 (type: string)
+                      expressions: _col2 (type: string), _col1 (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col1 (type: string)
+                  outputColumnNames: _col2, _col1
+                  Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(), count(_col1)
+                    keys: _col2 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 9 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -512,7 +706,6 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select p_mfgr, b.p_name, p_size 
 from part b 
 where b.p_name not in 
@@ -551,7 +744,7 @@ Manufacturer#4	almond azure aquamarine papaya violet	12
 Manufacturer#5	almond antique blue firebrick mint	31
 Manufacturer#5	almond aquamarine dodger light gainsboro	46
 Manufacturer#5	almond azure blanched chiffon midnight	23
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: -- agg, non corr
 explain
 select p_name, p_size 
@@ -581,8 +774,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE)
         Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
         Reducer 8 <- Map 7 (SIMPLE_EDGE)
@@ -630,6 +824,20 @@ STAGE PLANS:
                     TopN Hash Memory Usage: 0.1
             Execution mode: llap
             LLAP IO: no inputs
+        Reducer 10 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: double), KEY._col1 (type: boolean)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: double)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: double)
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: boolean)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -639,14 +847,14 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: UDFToDouble(_col1) (type: double)
                   sort order: +
                   Map-reduce partition columns: UDFToDouble(_col1) (type: double)
-                  Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
-                  value expressions: _col0 (type: string), _col1 (type: int)
+                  Statistics: Num rows: 26 Data size: 3666 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -656,18 +864,18 @@ STAGE PLANS:
                 keys:
                   0 UDFToDouble(_col1) (type: double)
                   1 _col0 (type: double)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3, _col5
+                Statistics: Num rows: 26 Data size: 3770 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: _col3 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
+                  Statistics: Num rows: 13 Data size: 1885 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col0 (type: string), _col1 (type: int)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -724,24 +932,15 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: count()
-                      mode: complete
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                      Filter Operator
-                        predicate: (_col0 = 0) (type: boolean)
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                        Select Operator
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count(), count(_col0)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint), _col1 (type: bigint)
         Reducer 8 
             Execution mode: llap
             Reduce Operator Tree:
@@ -794,11 +993,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: double)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: double)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: double), true (type: boolean)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: double), _col1 (type: boolean)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -806,7 +1010,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select p_name, p_size 
 from 
 part where part.p_size not in 
@@ -853,7 +1057,6 @@ almond aquamarine sandy cyan gainsboro	18
 almond aquamarine yellow dodger mint	7
 almond azure aquamarine papaya violet	12
 almond azure blanched chiffon midnight	23
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: -- agg, corr
 explain
 select p_mfgr, p_name, p_size 
@@ -881,11 +1084,17 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
+        Reducer 11 <- Map 10 (SIMPLE_EDGE)
+        Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
+        Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+        Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+        Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+        Reducer 17 <- Map 16 (SIMPLE_EDGE)
+        Reducer 19 <- Map 18 (SIMPLE_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE)
-        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
         Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
         Reducer 9 <- Map 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -900,12 +1109,14 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
-                      sort order: 
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
                       Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                      value expressions: _col0 (type: string), _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 4 
+        Map 10 
             Map Operator Tree:
                 TableScan
                   alias: part
@@ -918,7 +1129,41 @@ STAGE PLANS:
                     TopN Hash Memory Usage: 0.1
             Execution mode: llap
             LLAP IO: no inputs
-        Map 8 
+        Map 16 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_mfgr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 18 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_size (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: part
@@ -931,7 +1176,92 @@ STAGE PLANS:
                     TopN Hash Memory Usage: 0.1
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 10 
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_mfgr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 11 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
+                outputColumnNames: _col2, _col5
+                Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col2: string, _col5: int
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col5 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: rank_window_0
+                              arguments: _col5
+                              name: rank
+                              window function: GenericUDAFRankEvaluator
+                              window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (rank_window_0 <= 2) (type: boolean)
+                    Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col2 (type: string), _col5 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+        Reducer 12 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col1 (type: int)
+                  outputColumnNames: _col2, _col1
+                  Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: min(_col1)
+                    keys: _col2 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: int)
+        Reducer 13 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -940,32 +1270,94 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col1 (type: int), _col0 (type: string)
+                Group By Operator
+                  keys: _col1 (type: int), _col0 (type: string)
+                  mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
-                    key expressions: _col1 (type: string), _col0 (type: int)
+                    key expressions: _col0 (type: int), _col1 (type: string)
                     sort order: ++
-                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 2 
+                    Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+                    Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 14 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: string), _col2 (type: boolean)
+        Reducer 15 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string), _col3 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col1 (type: string), _col3 (type: int)
+                  Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: boolean)
+        Reducer 17 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 19 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col4, _col5
+                Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col1 (type: string), _col2 (type: int)
                   sort order: ++
                   Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
-                  Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                  value expressions: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -974,19 +1366,19 @@ STAGE PLANS:
                      Left Outer Join0 to 1
                 keys:
                   0 _col1 (type: string), _col2 (type: int)
-                  1 _col1 (type: string), _col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col4
-                Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
+                  1 _col1 (type: string), _col3 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
+                Statistics: Num rows: 26 Data size: 6318 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: _col4 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 227 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+                  Statistics: Num rows: 13 Data size: 3159 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
                     outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1025,113 +1417,81 @@ STAGE PLANS:
                     Select Operator
                       expressions: _col2 (type: string), _col5 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: min(_col1)
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col1 (type: int)
+                      Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
         Reducer 6 
             Execution mode: llap
             Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col1 (type: int)
+                  outputColumnNames: _col2, _col1
+                  Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: min(_col1)
+                    keys: _col2 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: int)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col1 is null or _col0 is null) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: count()
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: bigint)
-        Reducer 7 
+                Group By Operator
+                  aggregations: count(), count(_col1)
+                  keys: _col0 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 9 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col0 = 0) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 9 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
-                outputColumnNames: _col2, _col5
-                Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE
-                PTF Operator
-                  Function definitions:
-                      Input definition
-                        input alias: ptf_0
-                        output shape: _col2: string, _col5: int
-                        type: WINDOWING
-                      Windowing table definition
-                        input alias: ptf_1
-                        name: windowingtablefunction
-                        order by: _col5 ASC NULLS FIRST
-                        partition by: _col2
-                        raw input shape:
-                        window functions:
-                            window function definition
-                              alias: rank_window_0
-                              arguments: _col5
-                              name: rank
-                              window function: GenericUDAFRankEvaluator
-                              window frame: PRECEDING(MAX)~FOLLOWING(MAX)
-                              isPivotResult: true
-                  Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: (rank_window_0 <= 2) (type: boolean)
-                    Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col2 (type: string), _col5 (type: int)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: min(_col1)
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col1 (type: int)
-
-  Stage: Stage-0
-    Fetch Operator
+                Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
       limit: -1
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select p_mfgr, p_name, p_size 
 from part b where b.p_size not in 
   (select min(p_size) 
@@ -1172,21 +1532,21 @@ Manufacturer#5	almond antique medium spring khaki	6
 Manufacturer#5	almond azure blanched chiffon midnight	23
 Manufacturer#5	almond antique blue firebrick mint	31
 Manufacturer#5	almond aquamarine dodger light gainsboro	46
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: -- non agg, non corr, Group By in Parent Query
-select li.l_partkey, count(*) 
-from lineitem li 
-where li.l_linenumber = 1 and 
-  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 group by li.l_partkey
 PREHOOK: type: QUERY
 PREHOOK: Input: default@lineitem
 #### A masked pattern was here ####
 POSTHOOK: query: -- non agg, non corr, Group By in Parent Query
-select li.l_partkey, count(*) 
-from lineitem li 
-where li.l_linenumber = 1 and 
-  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 group by li.l_partkey
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@lineitem
@@ -1207,7 +1567,7 @@ POSTHOOK: Input: default@lineitem
 139636	1
 175839	1
 182052	1
-Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: -- alternate not in syntax
 select * 
 from src 
@@ -1371,7 +1731,7 @@ POSTHOOK: Input: default@src
 POSTHOOK: Input: default@t1_v
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@T2_v
-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: explain
 select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
@@ -1390,8 +1750,9 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-        Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1422,19 +1783,21 @@ STAGE PLANS:
                     insideView TRUE
                   Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (true) ELSE (key is null) END) (type: boolean)
-                    Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key < '11') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
+                      expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
-                        aggregations: count()
+                        aggregations: count(), count(_col0)
                         mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: bigint), _col1 (type: bigint)
             Execution mode: llap
             LLAP IO: no inputs
         Map 6 
@@ -1445,17 +1808,22 @@ STAGE PLANS:
                     insideView TRUE
                   Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE ((key < '11')) END) (type: boolean)
-                    Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key < '11') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 166 Data size: 31208 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: string), true (type: boolean)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: boolean)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+                          Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -1467,13 +1835,14 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0
-                Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 166 Data size: 17098 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 166 Data size: 17098 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint), _col2 (type: bigint)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1483,18 +1852,18 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
-                outputColumnNames: _col0, _col2
-                Statistics: Num rows: 199 Data size: 53929 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col4
+                Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: _col2 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 271 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean)
+                  Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col0 (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1503,18 +1872,28 @@ STAGE PLANS:
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
+                aggregations: count(VALUE._col0), count(VALUE._col1)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col0 = 0) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: boolean)
 
   Stage: Stage-0
     Fetch Operator
@@ -1522,7 +1901,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
 PREHOOK: type: QUERY
@@ -1537,3 +1916,7252 @@ POSTHOOK: Input: default@src
 POSTHOOK: Input: default@t1_v
 POSTHOOK: Input: default@t2_v
 #### A masked pattern was here ####
+PREHOOK: query: --where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+PREHOOK: type: QUERY
+POSTHOOK: query: --where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+        Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+        Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+        Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
+        Reducer 15 <- Map 14 (SIMPLE_EDGE)
+        Reducer 17 <- Map 16 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean)
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col4 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col4 (type: string)
+                        Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 14 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean)
+                    Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_type (type: string)
+                      outputColumnNames: p_type
+                      Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: p_type (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 16 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean)
+                    Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_size (type: int)
+                      outputColumnNames: p_size
+                      Statistics: Num rows: 26 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: p_size (type: int)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: p
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean)
+                    Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_type (type: string)
+                      outputColumnNames: p_type
+                      Statistics: Num rows: 26 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: p_type (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: p
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 10 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 12 Data size: 2544 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: min(_col1)
+                  keys: _col2 (type: string), _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _c

<TRUNCATED>

[21/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/382dc208
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/382dc208
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/382dc208

Branch: refs/heads/master
Commit: 382dc208422496c99e9ca39489128d0e9e23952a
Parents: 2ae78f0
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Sun Nov 13 18:33:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Dec 16 10:27:15 2016 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   10 +-
 .../hadoop/hive/ql/lib/ExpressionWalker.java    |   97 +
 .../ql/optimizer/calcite/HiveRelShuttle.java    |   40 +
 .../optimizer/calcite/HiveRelShuttleImpl.java   |  145 +
 .../calcite/HiveSubQRemoveRelBuilder.java       | 1680 ++++
 .../calcite/reloperators/HiveFilter.java        |   72 +-
 .../calcite/reloperators/HiveJoin.java          |   10 +
 .../calcite/reloperators/HiveProject.java       |   10 +
 .../calcite/reloperators/HiveSortLimit.java     |   10 +
 .../calcite/rules/HiveRelDecorrelator.java      | 3007 +++++++
 .../calcite/rules/HiveSubQueryRemoveRule.java   |  329 +
 .../calcite/translator/HiveOpConverter.java     |    5 +
 .../calcite/translator/RexNodeConverter.java    |   71 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  261 +-
 .../apache/hadoop/hive/ql/parse/QBSubQuery.java |   10 -
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   12 +
 .../hadoop/hive/ql/parse/TypeCheckCtx.java      |   49 +-
 .../hive/ql/parse/TypeCheckProcFactory.java     |   74 +-
 .../hive/ql/plan/ExprNodeSubQueryDesc.java      |  104 +
 .../queries/clientnegative/subquery_corr_from.q |    3 +
 .../clientnegative/subquery_corr_grandparent.q  |    5 +
 .../clientnegative/subquery_corr_select.q       |    2 +
 .../clientnegative/subquery_restrictions.q      |   92 +
 .../queries/clientpositive/cbo_rp_auto_join1.q  |    1 +
 ql/src/test/queries/clientpositive/join31.q     |    1 +
 .../test/queries/clientpositive/multiMapJoin2.q |    1 +
 .../test/queries/clientpositive/subquery_in.q   |  102 +
 .../queries/clientpositive/subquery_notin.q     |  133 +-
 .../clientpositive/subquery_notin_having.q      |   16 +
 .../clientnegative/subquery_corr_from.q.out     |    1 +
 .../subquery_corr_grandparent.q.out             |    1 +
 .../clientnegative/subquery_corr_select.q.out   |    1 +
 .../clientnegative/subquery_in_groupby.q.out    |    2 +-
 .../clientnegative/subquery_in_select.q.out     |    2 +-
 .../subquery_nested_subquery.q.out              |    1 -
 .../clientnegative/subquery_restrictions.q.out  |    1 +
 .../clientpositive/constant_prop_3.q.out        |  129 +-
 .../clientpositive/constprog_partitioner.q.out  |  134 +-
 .../llap/dynamic_partition_pruning.q.out        |  484 +-
 .../clientpositive/llap/explainuser_1.q.out     | 1122 ++-
 .../results/clientpositive/llap/lineage3.q.out  |   10 +-
 .../clientpositive/llap/subquery_exists.q.out   |  219 +-
 .../clientpositive/llap/subquery_in.q.out       | 4309 ++++++++-
 .../llap/subquery_nested_subquery.q.out         |   38 +
 .../clientpositive/llap/subquery_notin.q.out    | 8278 +++++++++++++++++-
 .../llap/subquery_shared_alias.q.out            |   23 +
 .../clientpositive/llap/subquery_views.q.out    |  624 +-
 .../llap/vector_groupby_mapjoin.q.out           |   72 +-
 .../llap/vector_mapjoin_reduce.q.out            |  329 +-
 .../vectorized_dynamic_partition_pruning.q.out  |  484 +-
 .../test/results/clientpositive/masking_3.q.out | 1042 ++-
 .../test/results/clientpositive/masking_4.q.out |  143 +-
 .../results/clientpositive/perf/query45.q.out   |  120 +-
 .../results/clientpositive/perf/query70.q.out   |  142 +-
 .../test/results/clientpositive/semijoin4.q.out |   93 +-
 .../test/results/clientpositive/semijoin5.q.out |  245 +-
 .../spark/constprog_partitioner.q.out           |   96 +-
 .../clientpositive/spark/subquery_exists.q.out  |  211 +-
 .../clientpositive/spark/subquery_in.q.out      | 4087 ++++++++-
 .../spark/vector_mapjoin_reduce.q.out           |  387 +-
 .../subq_where_serialization.q.out              |  118 +-
 .../clientpositive/subquery_exists.q.out        |  265 +-
 .../clientpositive/subquery_exists_having.q.out |  255 +-
 .../clientpositive/subquery_in_having.q.out     | 1030 ++-
 .../clientpositive/subquery_notexists.q.out     |  256 +-
 .../subquery_notexists_having.q.out             |  280 +-
 .../clientpositive/subquery_notin_having.q.out  | 1428 ++-
 .../subquery_unqualcolumnrefs.q.out             | 1379 ++-
 .../clientpositive/vector_groupby_mapjoin.q.out |  245 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |  456 +-
 70 files changed, 30598 insertions(+), 4296 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index aa3d72d..2902d92 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -24,6 +24,9 @@ minimr.query.files=infer_bucket_sort_map_operators.q,\
 # Tests that are not enabled for CLI Driver
 disabled.query.files=ql_rewrite_gbtoidx.q,\
   ql_rewrite_gbtoidx_cbo_1.q,\
+  cbo_rp_subq_in.q,\
+  cbo_rp_subq_not_in.q,\
+  cbo_rp_subq_exists.q,\
   ql_rewrite_gbtoidx_cbo_2.q,\
   rcfile_merge1.q,\
   smb_mapjoin_8.q,\
@@ -228,6 +231,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   stats_only_null.q,\
   subquery_exists.q,\
   subquery_in.q,\
+  subquery_restrictions,\
   temp_table.q,\
   tez_bmj_schema_evolution.q,\
   tez_dml.q,\
@@ -472,7 +476,6 @@ minillaplocal.query.files=acid_globallimit.q,\
   cbo_rp_join.q,\
   cbo_rp_lineage2.q,\
   cbo_rp_semijoin.q,\
-  cbo_rp_subq_not_in.q,\
   cbo_rp_unionDistinct_2.q,\
   cbo_rp_windowing_2.q,\
   cbo_subq_not_in.q,\
@@ -566,6 +569,8 @@ minillaplocal.query.files=acid_globallimit.q,\
   special_character_in_tabnames_1.q,\
   stats_based_fetch_decision.q,\
   subquery_notin.q,\
+  subquery_nested_subquery.q, \
+  subquery_shared_alias.q, \
   table_access_keys_stats.q,\
   tez_bmj_schema_evolution.q,\
   tez_dml.q,\
@@ -648,11 +653,11 @@ minillaplocal.query.files=acid_globallimit.q,\
   offset_limit_ppd_optimizer.q,\
   cluster.q,\
   subquery_in.q,\
+  subquery_restrictions,\
   stats11.q,\
   orc_create.q,\
   orc_split_elimination.q,\
   order_null.q,\
-  cbo_rp_subq_in.q,\
   skewjoinopt15.q,\
   authorization_2.q,\
   cbo_subq_in.q,\
@@ -1327,6 +1332,7 @@ spark.query.files=add_part_multiple.q, \
   statsfs.q, \
   subquery_exists.q, \
   subquery_in.q, \
+  subquery_restrictions, \
   subquery_multiinsert.q, \
   table_access_keys_stats.q, \
   temp_table.q, \

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
new file mode 100644
index 0000000..37ad4cb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.lib;
+
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+public class ExpressionWalker extends DefaultGraphWalker {
+
+  /**
+   * Constructor.
+   *
+   * @param disp
+   * dispatcher to call for each op encountered
+   */
+  public ExpressionWalker(Dispatcher disp) {
+    super(disp);
+  }
+
+
+  /**
+   * We should bypass subquery since we have already processed and created logical plan
+   * (in genLogicalPlan) for subquery at this point.
+   * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc.
+   */
+  private boolean shouldByPass(Node childNode, Node parentNode) {
+    if(parentNode instanceof ASTNode
+            && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) {
+      ASTNode parentOp = (ASTNode)parentNode;
+      //subquery either in WHERE <LHS> IN <SUBQUERY> form OR WHERE EXISTS <SUBQUERY> form
+      //in first case LHS should not be bypassed
+      assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3);
+      if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2)) {
+        return false;
+      }
+      return true;
+    }
+    return false;
+  }
+  /**
+   * walk the current operator and its descendants.
+   *
+   * @param nd
+   *          current operator in the graph
+   * @throws SemanticException
+   */
+  protected void walk(Node nd) throws SemanticException {
+    // Push the node in the stack
+    opStack.push(nd);
+
+    // While there are still nodes to dispatch...
+    while (!opStack.empty()) {
+      Node node = opStack.peek();
+
+      if (node.getChildren() == null ||
+              getDispatchedList().containsAll(node.getChildren())) {
+        // Dispatch current node
+        if (!getDispatchedList().contains(node)) {
+          dispatch(node, opStack);
+          opQueue.add(node);
+        }
+        opStack.pop();
+        continue;
+      }
+
+      // Add a single child and restart the loop
+      for (Node childNode : node.getChildren()) {
+        if (!getDispatchedList().contains(childNode)) {
+          if(shouldByPass(childNode, node)) {
+            retMap.put(childNode, null);
+          } else {
+            opStack.push(childNode);
+          }
+          break;
+        }
+      }
+    } // end while
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java
new file mode 100644
index 0000000..94a6ea6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttle.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttle;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+/**
+ * Visitor that has methods for the common logical relational expressions.
+ * This is required for HiveRelDecorrelation. Because we will have mix of
+ *  HiveProject, LogicalProject etc we need an interface which can handle all
+ */
+public interface HiveRelShuttle extends RelShuttle {
+
+    RelNode visit(HiveProject project);
+    RelNode visit(HiveFilter filter);
+    RelNode visit(HiveJoin join);
+    RelNode visit(HiveAggregate aggregate);
+}
+
+// End RelShuttle.java

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
new file mode 100644
index 0000000..2aadf50
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.TableFunctionScan;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.logical.LogicalAggregate;
+import org.apache.calcite.rel.logical.LogicalCorrelate;
+import org.apache.calcite.rel.logical.LogicalExchange;
+import org.apache.calcite.rel.logical.LogicalFilter;
+import org.apache.calcite.rel.logical.LogicalIntersect;
+import org.apache.calcite.rel.logical.LogicalJoin;
+import org.apache.calcite.rel.logical.LogicalMinus;
+import org.apache.calcite.rel.logical.LogicalProject;
+import org.apache.calcite.rel.logical.LogicalSort;
+import org.apache.calcite.rel.logical.LogicalUnion;
+import org.apache.calcite.rel.logical.LogicalValues;
+import org.apache.calcite.util.Stacks;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class HiveRelShuttleImpl implements HiveRelShuttle {
+    protected final List<RelNode> stack = new ArrayList<RelNode>();
+
+    /**
+     * Visits a particular child of a parent.
+     */
+    protected RelNode visitChild(RelNode parent, int i, RelNode child) {
+        Stacks.push(stack, parent);
+        try {
+            RelNode child2 = child.accept(this);
+            if (child2 != child) {
+                final List<RelNode> newInputs =
+                        new ArrayList<RelNode>(parent.getInputs());
+                newInputs.set(i, child2);
+                return parent.copy(parent.getTraitSet(), newInputs);
+            }
+            return parent;
+        } finally {
+            Stacks.pop(stack, parent);
+        }
+    }
+
+    protected RelNode visitChildren(RelNode rel) {
+        for (Ord<RelNode> input : Ord.zip(rel.getInputs())) {
+            rel = visitChild(rel, input.i, input.e);
+        }
+        return rel;
+    }
+
+    public RelNode visit(LogicalAggregate aggregate) {
+        return visitChild(aggregate, 0, aggregate.getInput());
+    }
+
+    public RelNode visit(HiveAggregate aggregate) {
+        return visitChild(aggregate, 0, aggregate.getInput());
+    }
+
+    public RelNode visit(TableScan scan) {
+        return scan;
+    }
+
+    public RelNode visit(TableFunctionScan scan) {
+        return visitChildren(scan);
+    }
+
+    public RelNode visit(LogicalValues values) {
+        return values;
+    }
+
+    public RelNode visit(HiveFilter filter) {
+        return visitChild(filter, 0, filter.getInput());
+    }
+    public RelNode visit(LogicalFilter filter) {
+        return visitChild(filter, 0, filter.getInput());
+    }
+
+    public RelNode visit(HiveProject project) {
+        return visitChild(project, 0, project.getInput());
+    }
+
+    public RelNode visit(LogicalProject project) {
+        return visitChild(project, 0, project.getInput());
+    }
+
+    public RelNode visit(LogicalJoin join) {
+        return visitChildren(join);
+    }
+
+    public RelNode visit(HiveJoin join) {
+        return visitChildren(join);
+    }
+
+    public RelNode visit(LogicalCorrelate correlate) {
+        return visitChildren(correlate);
+    }
+
+    public RelNode visit(LogicalUnion union) {
+        return visitChildren(union);
+    }
+
+    public RelNode visit(LogicalIntersect intersect) {
+        return visitChildren(intersect);
+    }
+
+    public RelNode visit(LogicalMinus minus) {
+        return visitChildren(minus);
+    }
+
+    public RelNode visit(LogicalSort sort) {
+        return visitChildren(sort);
+    }
+
+    public RelNode visit(LogicalExchange exchange) {
+        return visitChildren(exchange);
+    }
+
+    public RelNode visit(RelNode other) {
+        return visitChildren(other);
+    }
+}
+
+// End RelShuttleImpl.java

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
new file mode 100644
index 0000000..e028a99
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -0,0 +1,1680 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.plan.Context;
+import org.apache.calcite.plan.Contexts;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptSchema;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.server.CalciteServerStatement;
+import org.apache.calcite.sql.SemiJoinType;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.validate.SqlValidatorUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.tools.FrameworkConfig;
+import org.apache.calcite.tools.Frameworks;
+import org.apache.calcite.util.CompositeList;
+import org.apache.calcite.util.Holder;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.ImmutableIntList;
+import org.apache.calcite.util.Litmus;
+import org.apache.calcite.util.NlsString;
+import org.apache.calcite.util.Pair;
+import org.apache.calcite.util.Static;
+import org.apache.calcite.util.Util;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.Mappings;
+
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import java.math.BigDecimal;
+import java.util.AbstractList;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * Builder for relational expressions.
+ * TODO:
+ *  Note that this is copied from Calcite's RelBulder
+ *  because CALCITE-1493 hasn't been fixed yet
+ *  This should be deleted and replaced with RelBuilder in SubqueryRemoveRule
+ *  once CALCITE-1493 is fixed.
+ *
+ * <p>{@code RelBuilder} does not make possible anything that you could not
+ * also accomplish by calling the factory methods of the particular relational
+ * expression. But it makes common tasks more straightforward and concise.
+ *
+ * <p>{@code RelBuilder} uses factories to create relational expressions.
+ * By default, it uses the default factories, which create logical relational
+ * expressions ({@link org.apache.calcite.rel.logical.LogicalFilter},
+ * {@link org.apache.calcite.rel.logical.LogicalProject} and so forth).
+ * But you could override those factories so that, say, {@code filter} creates
+ * instead a {@code HiveFilter}.
+ *
+ * <p>It is not thread-safe.
+ */
+public class HiveSubQRemoveRelBuilder {
+  private static final Function<RexNode, String> FN_TYPE =
+          new Function<RexNode, String>() {
+            public String apply(RexNode input) {
+              return input + ": " + input.getType();
+            }
+          };
+
+  protected final RelOptCluster cluster;
+  protected final RelOptSchema relOptSchema;
+  private final RelFactories.FilterFactory filterFactory;
+  private final RelFactories.ProjectFactory projectFactory;
+  private final RelFactories.AggregateFactory aggregateFactory;
+  private final RelFactories.SortFactory sortFactory;
+  private final RelFactories.SetOpFactory setOpFactory;
+  private final RelFactories.JoinFactory joinFactory;
+  private final RelFactories.SemiJoinFactory semiJoinFactory;
+  private final RelFactories.CorrelateFactory correlateFactory;
+  private final RelFactories.ValuesFactory valuesFactory;
+  private final RelFactories.TableScanFactory scanFactory;
+  private final Deque<Frame> stack = new ArrayDeque<>();
+
+  public HiveSubQRemoveRelBuilder(Context context, RelOptCluster cluster,
+                       RelOptSchema relOptSchema) {
+    this.cluster = cluster;
+    this.relOptSchema = relOptSchema;
+    if (context == null) {
+      context = Contexts.EMPTY_CONTEXT;
+    }
+    this.aggregateFactory =
+            Util.first(context.unwrap(RelFactories.AggregateFactory.class),
+                    RelFactories.DEFAULT_AGGREGATE_FACTORY);
+    this.filterFactory =
+            Util.first(context.unwrap(RelFactories.FilterFactory.class),
+                    RelFactories.DEFAULT_FILTER_FACTORY);
+    this.projectFactory =
+            Util.first(context.unwrap(RelFactories.ProjectFactory.class),
+                    RelFactories.DEFAULT_PROJECT_FACTORY);
+    this.sortFactory =
+            Util.first(context.unwrap(RelFactories.SortFactory.class),
+                    RelFactories.DEFAULT_SORT_FACTORY);
+    this.setOpFactory =
+            Util.first(context.unwrap(RelFactories.SetOpFactory.class),
+                    RelFactories.DEFAULT_SET_OP_FACTORY);
+    this.joinFactory =
+            Util.first(context.unwrap(RelFactories.JoinFactory.class),
+                    RelFactories.DEFAULT_JOIN_FACTORY);
+    this.semiJoinFactory =
+            Util.first(context.unwrap(RelFactories.SemiJoinFactory.class),
+                    RelFactories.DEFAULT_SEMI_JOIN_FACTORY);
+    this.correlateFactory =
+            Util.first(context.unwrap(RelFactories.CorrelateFactory.class),
+                    RelFactories.DEFAULT_CORRELATE_FACTORY);
+    this.valuesFactory =
+            Util.first(context.unwrap(RelFactories.ValuesFactory.class),
+                    RelFactories.DEFAULT_VALUES_FACTORY);
+    this.scanFactory =
+            Util.first(context.unwrap(RelFactories.TableScanFactory.class),
+                    RelFactories.DEFAULT_TABLE_SCAN_FACTORY);
+  }
+
+    /** Creates a RelBuilder. */
+  public static HiveSubQRemoveRelBuilder create(FrameworkConfig config) {
+    final RelOptCluster[] clusters = {null};
+    final RelOptSchema[] relOptSchemas = {null};
+    Frameworks.withPrepare(
+            new Frameworks.PrepareAction<Void>(config) {
+              public Void apply(RelOptCluster cluster, RelOptSchema relOptSchema,
+                                SchemaPlus rootSchema, CalciteServerStatement statement) {
+                clusters[0] = cluster;
+                relOptSchemas[0] = relOptSchema;
+                return null;
+              }
+            });
+    return new HiveSubQRemoveRelBuilder(config.getContext(), clusters[0], relOptSchemas[0]);
+  }
+
+  /** Returns the type factory. */
+  public RelDataTypeFactory getTypeFactory() {
+    return cluster.getTypeFactory();
+  }
+
+  /** Returns the builder for {@link RexNode} expressions. */
+  public RexBuilder getRexBuilder() {
+    return cluster.getRexBuilder();
+  }
+
+  /** Adds a relational expression to be the input to the next relational
+   * expression constructed.
+   *
+   * <p>This method is usual when you want to weave in relational expressions
+   * that are not supported by the builder. If, while creating such expressions,
+   * you need to use previously built expressions as inputs, call
+   * {@link #build()} to pop those inputs. */
+  public HiveSubQRemoveRelBuilder push(RelNode node) {
+    stack.push(new Frame(node));
+    return this;
+  }
+
+  /** Pushes a collection of relational expressions. */
+  public HiveSubQRemoveRelBuilder pushAll(Iterable<? extends RelNode> nodes) {
+    for (RelNode node : nodes) {
+      push(node);
+    }
+    return this;
+  }
+
+  /** Returns the final relational expression.
+   *
+   * <p>Throws if the stack is empty.
+   */
+  public RelNode build() {
+    return stack.pop().rel;
+  }
+
+  /** Returns the relational expression at the top of the stack, but does not
+   * remove it. */
+  public RelNode peek() {
+    return peek_().rel;
+  }
+
+  private Frame peek_() {
+    return stack.peek();
+  }
+
+  /** Returns the relational expression {@code n} positions from the top of the
+   * stack, but does not remove it. */
+  public RelNode peek(int n) {
+    return peek_(n).rel;
+  }
+
+  private Frame peek_(int n) {
+    return Iterables.get(stack, n);
+  }
+
+  /** Returns the relational expression {@code n} positions from the top of the
+   * stack, but does not remove it. */
+  public RelNode peek(int inputCount, int inputOrdinal) {
+    return peek_(inputCount, inputOrdinal).rel;
+  }
+
+  private Frame peek_(int inputCount, int inputOrdinal) {
+    return peek_(inputCount - 1 - inputOrdinal);
+  }
+
+  /** Returns the number of fields in all inputs before (to the left of)
+   * the given input.
+   *
+   * @param inputCount Number of inputs
+   * @param inputOrdinal Input ordinal
+   */
+  private int inputOffset(int inputCount, int inputOrdinal) {
+    int offset = 0;
+    for (int i = 0; i < inputOrdinal; i++) {
+      offset += peek(inputCount, i).getRowType().getFieldCount();
+    }
+    return offset;
+  }
+
+  // Methods that return scalar expressions
+
+  /** Creates a literal (constant expression). */
+  public RexNode literal(Object value) {
+    final RexBuilder rexBuilder = cluster.getRexBuilder();
+    if (value == null) {
+      return rexBuilder.constantNull();
+    } else if (value instanceof Boolean) {
+      return rexBuilder.makeLiteral((Boolean) value);
+    } else if (value instanceof BigDecimal) {
+      return rexBuilder.makeExactLiteral((BigDecimal) value);
+    } else if (value instanceof Float || value instanceof Double) {
+      return rexBuilder.makeApproxLiteral(
+              BigDecimal.valueOf(((Number) value).doubleValue()));
+    } else if (value instanceof Number) {
+      return rexBuilder.makeExactLiteral(
+              BigDecimal.valueOf(((Number) value).longValue()));
+    } else if (value instanceof String) {
+      return rexBuilder.makeLiteral((String) value);
+    } else {
+      throw new IllegalArgumentException("cannot convert " + value
+              + " (" + value.getClass() + ") to a constant");
+    }
+  }
+
+  /** Creates a reference to a field by name.
+   *
+   * <p>Equivalent to {@code field(1, 0, fieldName)}.
+   *
+   * @param fieldName Field name
+   */
+  public RexInputRef field(String fieldName) {
+    return field(1, 0, fieldName);
+  }
+
+  /** Creates a reference to a field of given input relational expression
+   * by name.
+   *
+   * @param inputCount Number of inputs
+   * @param inputOrdinal Input ordinal
+   * @param fieldName Field name
+   */
+  public RexInputRef field(int inputCount, int inputOrdinal, String fieldName) {
+    final Frame frame = peek_(inputCount, inputOrdinal);
+    final List<String> fieldNames = Pair.left(frame.fields());
+    int i = fieldNames.indexOf(fieldName);
+    if (i >= 0) {
+      return field(inputCount, inputOrdinal, i);
+    } else {
+      throw new IllegalArgumentException("field [" + fieldName
+              + "] not found; input fields are: " + fieldNames);
+    }
+  }
+
+  /** Creates a reference to an input field by ordinal.
+   *
+   * <p>Equivalent to {@code field(1, 0, ordinal)}.
+   *
+   * @param fieldOrdinal Field ordinal
+   */
+  public RexInputRef field(int fieldOrdinal) {
+    return (RexInputRef) field(1, 0, fieldOrdinal, false);
+  }
+
+  /** Creates a reference to a field of a given input relational expression
+   * by ordinal.
+   *
+   * @param inputCount Number of inputs
+   * @param inputOrdinal Input ordinal
+   * @param fieldOrdinal Field ordinal within input
+   */
+  public RexInputRef field(int inputCount, int inputOrdinal, int fieldOrdinal) {
+    return (RexInputRef) field(inputCount, inputOrdinal, fieldOrdinal, false);
+  }
+
+  /** As {@link #field(int, int, int)}, but if {@code alias} is true, the method
+   * may apply an alias to make sure that the field has the same name as in the
+   * input frame. If no alias is applied the expression is definitely a
+   * {@link RexInputRef}. */
+  private RexNode field(int inputCount, int inputOrdinal, int fieldOrdinal,
+                        boolean alias) {
+    final Frame frame = peek_(inputCount, inputOrdinal);
+    final RelNode input = frame.rel;
+    final RelDataType rowType = input.getRowType();
+    if (fieldOrdinal < 0 || fieldOrdinal > rowType.getFieldCount()) {
+      throw new IllegalArgumentException("field ordinal [" + fieldOrdinal
+              + "] out of range; input fields are: " + rowType.getFieldNames());
+    }
+    final RelDataTypeField field = rowType.getFieldList().get(fieldOrdinal);
+    final int offset = inputOffset(inputCount, inputOrdinal);
+    final RexInputRef ref = cluster.getRexBuilder()
+            .makeInputRef(field.getType(), offset + fieldOrdinal);
+    final RelDataTypeField aliasField = frame.fields().get(fieldOrdinal);
+    if (!alias || field.getName().equals(aliasField.getName())) {
+      return ref;
+    } else {
+      return alias(ref, aliasField.getName());
+    }
+  }
+
+  /** Creates a reference to a field of the current record which originated
+   * in a relation with a given alias. */
+  public RexNode field(String alias, String fieldName) {
+    Preconditions.checkNotNull(alias);
+    Preconditions.checkNotNull(fieldName);
+    final Frame frame = stack.peek();
+    final List<String> aliases = new ArrayList<>();
+    int offset = 0;
+    for (Pair<String, RelDataType> pair : frame.right) {
+      if (pair.left != null && pair.left.equals(alias)) {
+        int i = pair.right.getFieldNames().indexOf(fieldName);
+        if (i >= 0) {
+          return field(offset + i);
+        } else {
+          throw new IllegalArgumentException("no field '" + fieldName
+                  + "' in relation '" + alias
+                  + "'; fields are: " + pair.right.getFieldNames());
+        }
+      }
+      aliases.add(pair.left);
+      offset += pair.right.getFieldCount();
+    }
+    throw new IllegalArgumentException("no relation wtih alias '" + alias
+            + "'; aliases are: " + aliases);
+  }
+
+  /** Returns references to the fields of the top input. */
+  public ImmutableList<RexNode> fields() {
+    return fields(1, 0);
+  }
+
+  /** Returns references to the fields of a given input. */
+  public ImmutableList<RexNode> fields(int inputCount, int inputOrdinal) {
+    final RelNode input = peek(inputCount, inputOrdinal);
+    final RelDataType rowType = input.getRowType();
+    final ImmutableList.Builder<RexNode> nodes = ImmutableList.builder();
+    for (int fieldOrdinal : Util.range(rowType.getFieldCount())) {
+      nodes.add(field(inputCount, inputOrdinal, fieldOrdinal));
+    }
+    return nodes.build();
+  }
+
+  /** Returns references to fields for a given collation. */
+  public ImmutableList<RexNode> fields(RelCollation collation) {
+    final ImmutableList.Builder<RexNode> nodes = ImmutableList.builder();
+    for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
+      RexNode node = field(fieldCollation.getFieldIndex());
+      switch (fieldCollation.direction) {
+        case DESCENDING:
+          node = desc(node);
+      }
+      switch (fieldCollation.nullDirection) {
+        case FIRST:
+          node = nullsFirst(node);
+          break;
+        case LAST:
+          node = nullsLast(node);
+          break;
+      }
+      nodes.add(node);
+    }
+    return nodes.build();
+  }
+
+  /** Returns references to fields for a given list of input ordinals. */
+  public ImmutableList<RexNode> fields(List<? extends Number> ordinals) {
+    final ImmutableList.Builder<RexNode> nodes = ImmutableList.builder();
+    for (Number ordinal : ordinals) {
+      RexNode node = field(1, 0, ordinal.intValue(), true);
+      nodes.add(node);
+    }
+    return nodes.build();
+  }
+
+  /** Returns references to fields identified by name. */
+  public ImmutableList<RexNode> fields(Iterable<String> fieldNames) {
+    final ImmutableList.Builder<RexNode> builder = ImmutableList.builder();
+    for (String fieldName : fieldNames) {
+      builder.add(field(fieldName));
+    }
+    return builder.build();
+  }
+
+  /** Returns references to fields identified by a mapping. */
+  public ImmutableList<RexNode> fields(Mappings.TargetMapping mapping) {
+    return fields(Mappings.asList(mapping));
+  }
+
+  /** Creates an access to a field by name. */
+  public RexNode dot(RexNode node, String fieldName) {
+    final RexBuilder builder = cluster.getRexBuilder();
+    return builder.makeFieldAccess(node, fieldName, true);
+  }
+
+  /** Creates an access to a field by ordinal. */
+  public RexNode dot(RexNode node, int fieldOrdinal) {
+    final RexBuilder builder = cluster.getRexBuilder();
+    return builder.makeFieldAccess(node, fieldOrdinal);
+  }
+
+  /** Creates a call to a scalar operator. */
+  public RexNode call(SqlOperator operator, RexNode... operands) {
+    final RexBuilder builder = cluster.getRexBuilder();
+    final List<RexNode> operandList = ImmutableList.copyOf(operands);
+    final RelDataType type = builder.deriveReturnType(operator, operandList);
+    if (type == null) {
+      throw new IllegalArgumentException("cannot derive type: " + operator
+              + "; operands: " + Lists.transform(operandList, FN_TYPE));
+    }
+    return builder.makeCall(type, operator, operandList);
+  }
+
+  /** Creates a call to a scalar operator. */
+  public RexNode call(SqlOperator operator,
+                      Iterable<? extends RexNode> operands) {
+    return cluster.getRexBuilder().makeCall(operator,
+            ImmutableList.copyOf(operands));
+  }
+
+  /** Creates an AND. */
+  public RexNode and(RexNode... operands) {
+    return and(ImmutableList.copyOf(operands));
+  }
+
+  /** Creates an AND.
+   *
+   * <p>Simplifies the expression a little:
+   * {@code e AND TRUE} becomes {@code e};
+   * {@code e AND e2 AND NOT e} becomes {@code e2}. */
+  public RexNode and(Iterable<? extends RexNode> operands) {
+    return RexUtil.simplifyAnds(cluster.getRexBuilder(), operands);
+  }
+
+  /** Creates an OR. */
+  public RexNode or(RexNode... operands) {
+    return or(ImmutableList.copyOf(operands));
+  }
+
+  /** Creates an OR. */
+  public RexNode or(Iterable<? extends RexNode> operands) {
+    return RexUtil.composeDisjunction(cluster.getRexBuilder(), operands, false);
+  }
+
+  /** Creates a NOT. */
+  public RexNode not(RexNode operand) {
+    return call(SqlStdOperatorTable.NOT, operand);
+  }
+
+  /** Creates an =. */
+  public RexNode equals(RexNode operand0, RexNode operand1) {
+    return call(SqlStdOperatorTable.EQUALS, operand0, operand1);
+  }
+
+  /** Creates a IS NULL. */
+  public RexNode isNull(RexNode operand) {
+    return call(SqlStdOperatorTable.IS_NULL, operand);
+  }
+
+  /** Creates a IS NOT NULL. */
+  public RexNode isNotNull(RexNode operand) {
+    return call(SqlStdOperatorTable.IS_NOT_NULL, operand);
+  }
+
+  /** Creates an expression that casts an expression to a given type. */
+  public RexNode cast(RexNode expr, SqlTypeName typeName) {
+    final RelDataType type = cluster.getTypeFactory().createSqlType(typeName);
+    return cluster.getRexBuilder().makeCast(type, expr);
+  }
+
+  /** Creates an expression that casts an expression to a type with a given name
+   * and precision or length. */
+  public RexNode cast(RexNode expr, SqlTypeName typeName, int precision) {
+    final RelDataType type =
+            cluster.getTypeFactory().createSqlType(typeName, precision);
+    return cluster.getRexBuilder().makeCast(type, expr);
+  }
+
+  /** Creates an expression that casts an expression to a type with a given
+   * name, precision and scale. */
+  public RexNode cast(RexNode expr, SqlTypeName typeName, int precision,
+                      int scale) {
+    final RelDataType type =
+            cluster.getTypeFactory().createSqlType(typeName, precision, scale);
+    return cluster.getRexBuilder().makeCast(type, expr);
+  }
+
+  /**
+   * Returns an expression wrapped in an alias.
+   *
+   * @see #project
+   */
+  public RexNode alias(RexNode expr, String alias) {
+    return call(SqlStdOperatorTable.AS, expr, literal(alias));
+  }
+
+  /** Converts a sort expression to descending. */
+  public RexNode desc(RexNode node) {
+    return call(SqlStdOperatorTable.DESC, node);
+  }
+
+  /** Converts a sort expression to nulls last. */
+  public RexNode nullsLast(RexNode node) {
+    return call(SqlStdOperatorTable.NULLS_LAST, node);
+  }
+
+  /** Converts a sort expression to nulls first. */
+  public RexNode nullsFirst(RexNode node) {
+    return call(SqlStdOperatorTable.NULLS_FIRST, node);
+  }
+
+  // Methods that create group keys and aggregate calls
+
+  /** Creates an empty group key. */
+  public GroupKey groupKey() {
+    return groupKey(ImmutableList.<RexNode>of());
+  }
+
+  /** Creates a group key. */
+  public GroupKey groupKey(RexNode... nodes) {
+    return groupKey(ImmutableList.copyOf(nodes));
+  }
+
+  /** Creates a group key. */
+  public GroupKey groupKey(Iterable<? extends RexNode> nodes) {
+    return new GroupKeyImpl(ImmutableList.copyOf(nodes), false, null, null);
+  }
+
+  /** Creates a group key with grouping sets. */
+  public GroupKey groupKey(Iterable<? extends RexNode> nodes, boolean indicator,
+                           Iterable<? extends Iterable<? extends RexNode>> nodeLists) {
+    final ImmutableList.Builder<ImmutableList<RexNode>> builder =
+            ImmutableList.builder();
+    for (Iterable<? extends RexNode> nodeList : nodeLists) {
+      builder.add(ImmutableList.copyOf(nodeList));
+    }
+    return new GroupKeyImpl(ImmutableList.copyOf(nodes), indicator, builder.build(), null);
+  }
+
+  /** Creates a group key of fields identified by ordinal. */
+  public GroupKey groupKey(int... fieldOrdinals) {
+    return groupKey(fields(ImmutableIntList.of(fieldOrdinals)));
+  }
+
+  /** Creates a group key of fields identified by name. */
+  public GroupKey groupKey(String... fieldNames) {
+    return groupKey(fields(ImmutableList.copyOf(fieldNames)));
+  }
+
+  /** Creates a group key with grouping sets, both identified by field positions
+   * in the underlying relational expression.
+   *
+   * <p>This method of creating a group key does not allow you to group on new
+   * expressions, only column projections, but is efficient, especially when you
+   * are coming from an existing {@link Aggregate}. */
+  public GroupKey groupKey(ImmutableBitSet groupSet, boolean indicator,
+                           ImmutableList<ImmutableBitSet> groupSets) {
+    if (groupSet.length() > peek().getRowType().getFieldCount()) {
+      throw new IllegalArgumentException("out of bounds: " + groupSet);
+    }
+    if (groupSets == null) {
+      groupSets = ImmutableList.of(groupSet);
+    }
+    final ImmutableList<RexNode> nodes =
+            fields(ImmutableIntList.of(groupSet.toArray()));
+    final List<ImmutableList<RexNode>> nodeLists =
+            Lists.transform(groupSets,
+                    new Function<ImmutableBitSet, ImmutableList<RexNode>>() {
+                      public ImmutableList<RexNode> apply(ImmutableBitSet input) {
+                        return fields(ImmutableIntList.of(input.toArray()));
+                      }
+                    });
+    return groupKey(nodes, indicator, nodeLists);
+  }
+
+  /** Creates a call to an aggregate function. */
+  public AggCall aggregateCall(SqlAggFunction aggFunction, boolean distinct,
+                               RexNode filter, String alias, RexNode... operands) {
+    return aggregateCall(aggFunction, distinct, filter, alias,
+            ImmutableList.copyOf(operands));
+  }
+
+  /** Creates a call to an aggregate function. */
+  public AggCall aggregateCall(SqlAggFunction aggFunction, boolean distinct,
+                               RexNode filter, String alias, Iterable<? extends RexNode> operands) {
+    if (filter != null) {
+      if (filter.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) {
+        throw Static.RESOURCE.filterMustBeBoolean().ex();
+      }
+      if (filter.getType().isNullable()) {
+        filter = call(SqlStdOperatorTable.IS_TRUE, filter);
+      }
+    }
+    return new AggCallImpl(aggFunction, distinct, filter, alias,
+            ImmutableList.copyOf(operands));
+  }
+
+  /** Creates a call to the COUNT aggregate function. */
+  public AggCall count(boolean distinct, String alias, RexNode... operands) {
+    return aggregateCall(SqlStdOperatorTable.COUNT, distinct, null, alias,
+            operands);
+  }
+
+  /** Creates a call to the COUNT(*) aggregate function. */
+  public AggCall countStar(String alias) {
+    return aggregateCall(SqlStdOperatorTable.COUNT, false, null, alias);
+  }
+
+  /** Creates a call to the SUM aggregate function. */
+  public AggCall sum(boolean distinct, String alias, RexNode operand) {
+    return aggregateCall(SqlStdOperatorTable.SUM, distinct, null, alias,
+            operand);
+  }
+
+  /** Creates a call to the AVG aggregate function. */
+  public AggCall avg(boolean distinct, String alias, RexNode operand) {
+    return aggregateCall(
+            SqlStdOperatorTable.AVG, distinct, null, alias, operand);
+  }
+
+  /** Creates a call to the MIN aggregate function. */
+  public AggCall min(String alias, RexNode operand) {
+    return aggregateCall(SqlStdOperatorTable.MIN, false, null, alias, operand);
+  }
+
+  /** Creates a call to the MAX aggregate function. */
+  public AggCall max(String alias, RexNode operand) {
+    return aggregateCall(SqlStdOperatorTable.MAX, false, null, alias, operand);
+  }
+
+  // Methods that create relational expressions
+
+  /** Creates a {@link org.apache.calcite.rel.core.TableScan} of the table
+   * with a given name.
+   *
+   * <p>Throws if the table does not exist.
+   *
+   * <p>Returns this builder.
+   *
+   * @param tableNames Name of table (can optionally be qualified)
+   */
+  public HiveSubQRemoveRelBuilder scan(Iterable<String> tableNames) {
+    final List<String> names = ImmutableList.copyOf(tableNames);
+    final RelOptTable relOptTable = relOptSchema.getTableForMember(names);
+    if (relOptTable == null) {
+      throw Static.RESOURCE.tableNotFound(Joiner.on(".").join(names)).ex();
+    }
+    final RelNode scan = scanFactory.createScan(cluster, relOptTable);
+    push(scan);
+    return this;
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.TableScan} of the table
+   * with a given name.
+   *
+   * <p>Throws if the table does not exist.
+   *
+   * <p>Returns this builder.
+   *
+   * @param tableNames Name of table (can optionally be qualified)
+   */
+  public HiveSubQRemoveRelBuilder scan(String... tableNames) {
+    return scan(ImmutableList.copyOf(tableNames));
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Filter} of an array of
+   * predicates.
+   *
+   * <p>The predicates are combined using AND,
+   * and optimized in a similar way to the {@link #and} method.
+   * If the result is TRUE no filter is created. */
+  public HiveSubQRemoveRelBuilder filter(RexNode... predicates) {
+    return filter(ImmutableList.copyOf(predicates));
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Filter} of a list of
+   * predicates.
+   *
+   * <p>The predicates are combined using AND,
+   * and optimized in a similar way to the {@link #and} method.
+   * If the result is TRUE no filter is created. */
+  public HiveSubQRemoveRelBuilder filter(Iterable<? extends RexNode> predicates) {
+    final RexNode x = RexUtil.simplifyAnds(cluster.getRexBuilder(), predicates, true);
+    if (x.isAlwaysFalse()) {
+      return empty();
+    }
+    if (!x.isAlwaysTrue()) {
+      final Frame frame = stack.pop();
+      final RelNode filter = filterFactory.createFilter(frame.rel, x);
+      stack.push(new Frame(filter, frame.right));
+    }
+    return this;
+  }
+
+
+  /** Creates a {@link org.apache.calcite.rel.core.Project} of the given list
+   * of expressions.
+   *
+   * <p>Infers names as would {@link #project(Iterable, Iterable)} if all
+   * suggested names were null.
+   *
+   * @param nodes Expressions
+   */
+  public HiveSubQRemoveRelBuilder project(Iterable<? extends RexNode> nodes) {
+    return project(nodes, ImmutableList.<String>of());
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Project} of the given list
+   * of expressions and field names.
+   *
+   * <p>Infers names as would {@link #project(Iterable, Iterable)} if all
+   * suggested names were null.
+   *
+   * @param nodes Expressions
+   * @param fieldNames field names for expressions
+   */
+  public HiveSubQRemoveRelBuilder project(Iterable<? extends RexNode> nodes,
+                            Iterable<String> fieldNames) {
+    return project(nodes, fieldNames, false);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Project} of the given list
+   * of expressions, using the given names.
+   *
+   * <p>Names are deduced as follows:
+   * <ul>
+   *   <li>If the length of {@code fieldNames} is greater than the index of
+   *     the current entry in {@code nodes}, and the entry in
+   *     {@code fieldNames} is not null, uses it; otherwise
+   *   <li>If an expression projects an input field,
+   *     or is a cast an input field,
+   *     uses the input field name; otherwise
+   *   <li>If an expression is a call to
+   *     {@link org.apache.calcite.sql.fun.SqlStdOperatorTable#AS}
+   *     (see {@link #alias}), removes the call but uses the intended alias.
+   * </ul>
+   *
+   * <p>After the field names have been inferred, makes the
+   * field names unique by appending numeric suffixes.
+   *
+   * @param nodes Expressions
+   * @param fieldNames Suggested field names
+   * @param force create project even if it is identity
+   */
+  public HiveSubQRemoveRelBuilder project(
+          Iterable<? extends RexNode> nodes,
+          Iterable<String> fieldNames,
+          boolean force) {
+    final List<String> names = new ArrayList<>();
+    final List<RexNode> exprList = Lists.newArrayList(nodes);
+    final Iterator<String> nameIterator = fieldNames.iterator();
+    for (RexNode node : nodes) {
+      final String name = nameIterator.hasNext() ? nameIterator.next() : null;
+      final String name2 = inferAlias(exprList, node);
+      names.add(Util.first(name, name2));
+    }
+    final RelDataType inputRowType = peek().getRowType();
+    if (!force && RexUtil.isIdentity(exprList, inputRowType)) {
+      if (names.equals(inputRowType.getFieldNames())) {
+        // Do not create an identity project if it does not rename any fields
+        return this;
+      } else {
+        // create "virtual" row type for project only rename fields
+        final Frame frame = stack.pop();
+        final RelDataType rowType =
+                RexUtil.createStructType(cluster.getTypeFactory(), exprList,
+                        names, SqlValidatorUtil.F_SUGGESTER);
+        stack.push(
+                new Frame(frame.rel,
+                        ImmutableList.of(Pair.of(frame.right.get(0).left, rowType))));
+        return this;
+      }
+    }
+    final RelNode project =
+            projectFactory.createProject(build(), ImmutableList.copyOf(exprList),
+                    names);
+    push(project);
+    return this;
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Project} of the given
+   * expressions. */
+  public HiveSubQRemoveRelBuilder project(RexNode... nodes) {
+    return project(ImmutableList.copyOf(nodes));
+  }
+
+  /** Infers the alias of an expression.
+   *
+   * <p>If the expression was created by {@link #alias}, replaces the expression
+   * in the project list.
+   */
+  private String inferAlias(List<RexNode> exprList, RexNode expr) {
+    switch (expr.getKind()) {
+      case INPUT_REF:
+        final RexInputRef ref = (RexInputRef) expr;
+        return peek(0).getRowType().getFieldNames().get(ref.getIndex());
+      case CAST:
+        return inferAlias(exprList, ((RexCall) expr).getOperands().get(0));
+      case AS:
+        final RexCall call = (RexCall) expr;
+        for (;;) {
+          final int i = exprList.indexOf(expr);
+          if (i < 0) {
+            break;
+          }
+          exprList.set(i, call.getOperands().get(0));
+        }
+        return ((NlsString) ((RexLiteral) call.getOperands().get(1)).getValue())
+                .getValue();
+      default:
+        return null;
+    }
+  }
+
+  /** Creates an {@link org.apache.calcite.rel.core.Aggregate} that makes the
+   * relational expression distinct on all fields. */
+  public HiveSubQRemoveRelBuilder distinct() {
+    return aggregate(groupKey(fields()));
+  }
+
+  /** Creates an {@link org.apache.calcite.rel.core.Aggregate} with an array of
+   * calls. */
+  public HiveSubQRemoveRelBuilder aggregate(GroupKey groupKey, AggCall... aggCalls) {
+    return aggregate(groupKey, ImmutableList.copyOf(aggCalls));
+  }
+
+  /** Creates an {@link org.apache.calcite.rel.core.Aggregate} with a list of
+   * calls. */
+  public HiveSubQRemoveRelBuilder aggregate(GroupKey groupKey, Iterable<AggCall> aggCalls) {
+    final RelDataType inputRowType = peek().getRowType();
+    final List<RexNode> extraNodes = projects(inputRowType);
+    final GroupKeyImpl groupKey_ = (GroupKeyImpl) groupKey;
+    final ImmutableBitSet groupSet =
+            ImmutableBitSet.of(registerExpressions(extraNodes, groupKey_.nodes));
+    final ImmutableList<ImmutableBitSet> groupSets;
+    if (groupKey_.nodeLists != null) {
+      final int sizeBefore = extraNodes.size();
+      final SortedSet<ImmutableBitSet> groupSetSet =
+              new TreeSet<>(ImmutableBitSet.ORDERING);
+      for (ImmutableList<RexNode> nodeList : groupKey_.nodeLists) {
+        final ImmutableBitSet groupSet2 =
+                ImmutableBitSet.of(registerExpressions(extraNodes, nodeList));
+        if (!groupSet.contains(groupSet2)) {
+          throw new IllegalArgumentException("group set element " + nodeList
+                  + " must be a subset of group key");
+        }
+        groupSetSet.add(groupSet2);
+      }
+      groupSets = ImmutableList.copyOf(groupSetSet);
+      if (extraNodes.size() > sizeBefore) {
+        throw new IllegalArgumentException(
+                "group sets contained expressions not in group key: "
+                        + extraNodes.subList(sizeBefore, extraNodes.size()));
+      }
+    } else {
+      groupSets = ImmutableList.of(groupSet);
+    }
+    for (AggCall aggCall : aggCalls) {
+      if (aggCall instanceof AggCallImpl) {
+        final AggCallImpl aggCall1 = (AggCallImpl) aggCall;
+        registerExpressions(extraNodes, aggCall1.operands);
+        if (aggCall1.filter != null) {
+          registerExpression(extraNodes, aggCall1.filter);
+        }
+      }
+    }
+    if (extraNodes.size() > inputRowType.getFieldCount()) {
+      project(extraNodes);
+    }
+    final RelNode r = build();
+    final List<AggregateCall> aggregateCalls = new ArrayList<>();
+    for (AggCall aggCall : aggCalls) {
+      final AggregateCall aggregateCall;
+      if (aggCall instanceof AggCallImpl) {
+        final AggCallImpl aggCall1 = (AggCallImpl) aggCall;
+        final List<Integer> args = registerExpressions(extraNodes, aggCall1.operands);
+        final int filterArg = aggCall1.filter == null ? -1
+                : registerExpression(extraNodes, aggCall1.filter);
+        aggregateCall =
+                AggregateCall.create(aggCall1.aggFunction, aggCall1.distinct, args,
+                        filterArg, groupSet.cardinality(), r, null, aggCall1.alias);
+      } else {
+        aggregateCall = ((AggCallImpl2) aggCall).aggregateCall;
+      }
+      aggregateCalls.add(aggregateCall);
+    }
+
+    assert ImmutableBitSet.ORDERING.isStrictlyOrdered(groupSets) : groupSets;
+    for (ImmutableBitSet set : groupSets) {
+      assert groupSet.contains(set);
+    }
+    RelNode aggregate = aggregateFactory.createAggregate(r,
+            groupKey_.indicator, groupSet, groupSets, aggregateCalls);
+    push(aggregate);
+    return this;
+  }
+
+  private List<RexNode> projects(RelDataType inputRowType) {
+    final List<RexNode> exprList = new ArrayList<>();
+    for (RelDataTypeField field : inputRowType.getFieldList()) {
+      final RexBuilder rexBuilder = cluster.getRexBuilder();
+      exprList.add(rexBuilder.makeInputRef(field.getType(), field.getIndex()));
+    }
+    return exprList;
+  }
+
+  private static int registerExpression(List<RexNode> exprList, RexNode node) {
+    int i = exprList.indexOf(node);
+    if (i < 0) {
+      i = exprList.size();
+      exprList.add(node);
+    }
+    return i;
+  }
+
+  private static List<Integer> registerExpressions(List<RexNode> extraNodes,
+                                                   Iterable<? extends RexNode> nodes) {
+    final List<Integer> builder = new ArrayList<>();
+    for (RexNode node : nodes) {
+      builder.add(registerExpression(extraNodes, node));
+    }
+    return builder;
+  }
+
+  private HiveSubQRemoveRelBuilder setOp(boolean all, SqlKind kind, int n) {
+    List<RelNode> inputs = new LinkedList<>();
+    for (int i = 0; i < n; i++) {
+      inputs.add(0, build());
+    }
+    switch (kind) {
+      case UNION:
+      case INTERSECT:
+      case EXCEPT:
+      if (n < 1) {
+        throw new IllegalArgumentException(
+            "bad INTERSECT/UNION/EXCEPT input count");
+        }
+        break;
+      default:
+        throw new AssertionError("bad setOp " + kind);
+    }
+    switch (n) {
+      case 1:
+        return push(inputs.get(0));
+      default:
+        return push(setOpFactory.createSetOp(kind, inputs, all));
+    }
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Union} of the two most recent
+   * relational expressions on the stack.
+   *
+   * @param all Whether to create UNION ALL
+   */
+  public HiveSubQRemoveRelBuilder union(boolean all) {
+    return union(all, 2);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Union} of the {@code n}
+   * most recent relational expressions on the stack.
+   *
+   * @param all Whether to create UNION ALL
+   * @param n Number of inputs to the UNION operator
+   */
+  public HiveSubQRemoveRelBuilder union(boolean all, int n) {
+    return setOp(all, SqlKind.UNION, n);
+  }
+
+  /** Creates an {@link org.apache.calcite.rel.core.Intersect} of the two most
+   * recent relational expressions on the stack.
+   *
+   * @param all Whether to create INTERSECT ALL
+   */
+  public HiveSubQRemoveRelBuilder intersect(boolean all) {
+    return intersect(all, 2);
+  }
+
+  /** Creates an {@link org.apache.calcite.rel.core.Intersect} of the {@code n}
+   * most recent relational expressions on the stack.
+   *
+   * @param all Whether to create INTERSECT ALL
+   * @param n Number of inputs to the INTERSECT operator
+   */
+  public HiveSubQRemoveRelBuilder intersect(boolean all, int n) {
+    return setOp(all, SqlKind.INTERSECT, n);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Minus} of the two most recent
+   * relational expressions on the stack.
+   *
+   * @param all Whether to create EXCEPT ALL
+   */
+  public HiveSubQRemoveRelBuilder minus(boolean all) {
+    return minus(all, 2);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Minus} of the {@code n}
+   * most recent relational expressions on the stack.
+   *
+   * @param all Whether to create EXCEPT ALL
+   */
+  public HiveSubQRemoveRelBuilder minus(boolean all, int n) {
+    return setOp(all, SqlKind.EXCEPT, n);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Join}. */
+  public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition0,
+                         RexNode... conditions) {
+    return join(joinType, Lists.asList(condition0, conditions));
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Join} with multiple
+   * conditions. */
+  public HiveSubQRemoveRelBuilder join(JoinRelType joinType,
+                         Iterable<? extends RexNode> conditions) {
+    return join(joinType, and(conditions),
+            ImmutableSet.<CorrelationId>of());
+  }
+
+  public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition) {
+    return join(joinType, condition, ImmutableSet.<CorrelationId>of());
+  }
+
+  /** Creates a correlation variable for the current input, and writes it into
+   * a Holder. */
+  public HiveSubQRemoveRelBuilder variable(Holder<RexCorrelVariable> v) {
+    v.set((RexCorrelVariable)
+            getRexBuilder().makeCorrel(peek().getRowType(),
+                    cluster.createCorrel()));
+    return this;
+  }
+
+  /** Returns a reference to a given field of a record-valued expression. */
+  public RexNode field(RexNode e, String name) {
+    return getRexBuilder().makeFieldAccess(e, name, false);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
+   * variables. */
+  public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition,
+                         Set<CorrelationId> variablesSet) {
+    Frame right = stack.pop();
+    final Frame left = stack.pop();
+    final RelNode join;
+    final boolean correlate = variablesSet.size() == 1;
+    RexNode postCondition = literal(true);
+    if (correlate) {
+      final CorrelationId id = Iterables.getOnlyElement(variablesSet);
+      final ImmutableBitSet requiredColumns =
+              RelOptUtil.correlationColumns(id, right.rel);
+      if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
+        throw new IllegalArgumentException("variable " + id
+                + " must not be used by left input to correlation");
+      }
+      switch (joinType) {
+        case LEFT:
+          // Correlate does not have an ON clause.
+          // For a LEFT correlate, predicate must be evaluated first.
+          // For INNER, we can defer.
+          stack.push(right);
+          filter(condition.accept(new Shifter(left.rel, id, right.rel)));
+          right = stack.pop();
+          break;
+        default:
+          postCondition = condition;
+      }
+      join = correlateFactory.createCorrelate(left.rel, right.rel, id,
+              requiredColumns, SemiJoinType.of(joinType));
+    } else {
+      join = joinFactory.createJoin(left.rel, right.rel, condition,
+              variablesSet, joinType, false);
+    }
+    final List<Pair<String, RelDataType>> pairs = new ArrayList<>();
+    pairs.addAll(left.right);
+    pairs.addAll(right.right);
+    stack.push(new Frame(join, ImmutableList.copyOf(pairs)));
+    filter(postCondition);
+    return this;
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.Join} using USING syntax.
+   *
+   * <p>For each of the field names, both left and right inputs must have a
+   * field of that name. Constructs a join condition that the left and right
+   * fields are equal.
+   *
+   * @param joinType Join type
+   * @param fieldNames Field names
+   */
+  public HiveSubQRemoveRelBuilder join(JoinRelType joinType, String... fieldNames) {
+    final List<RexNode> conditions = new ArrayList<>();
+    for (String fieldName : fieldNames) {
+      conditions.add(
+              call(SqlStdOperatorTable.EQUALS,
+                      field(2, 0, fieldName),
+                      field(2, 1, fieldName)));
+    }
+    return join(joinType, conditions);
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */
+  public HiveSubQRemoveRelBuilder semiJoin(Iterable<? extends RexNode> conditions) {
+    final Frame right = stack.pop();
+    final Frame left = stack.pop();
+    final RelNode semiJoin =
+            semiJoinFactory.createSemiJoin(left.rel, right.rel, and(conditions));
+    stack.push(new Frame(semiJoin, left.right));
+    return this;
+  }
+
+  /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */
+  public HiveSubQRemoveRelBuilder semiJoin(RexNode... conditions) {
+    return semiJoin(ImmutableList.copyOf(conditions));
+  }
+
+  /** Assigns a table alias to the top entry on the stack. */
+  public HiveSubQRemoveRelBuilder as(String alias) {
+    final Frame pair = stack.pop();
+    stack.push(
+            new Frame(pair.rel,
+                    ImmutableList.of(Pair.of(alias, pair.right.get(0).right))));
+    return this;
+  }
+
+  /** Creates a {@link Values}.
+   *
+   * <p>The {@code values} array must have the same number of entries as
+   * {@code fieldNames}, or an integer multiple if you wish to create multiple
+   * rows.
+   *
+   * <p>If there are zero rows, or if all values of a any column are
+   * null, this method cannot deduce the type of columns. For these cases,
+   * call {@link #values(Iterable, RelDataType)}.
+   *
+   * @param fieldNames Field names
+   * @param values Values
+   */
+  public HiveSubQRemoveRelBuilder values(String[] fieldNames, Object... values) {
+    if (fieldNames == null
+            || fieldNames.length == 0
+            || values.length % fieldNames.length != 0
+            || values.length < fieldNames.length) {
+      throw new IllegalArgumentException(
+              "Value count must be a positive multiple of field count");
+    }
+    final int rowCount = values.length / fieldNames.length;
+    for (Ord<String> fieldName : Ord.zip(fieldNames)) {
+      if (allNull(values, fieldName.i, fieldNames.length)) {
+        throw new IllegalArgumentException("All values of field '" + fieldName.e
+                + "' are null; cannot deduce type");
+      }
+    }
+    final ImmutableList<ImmutableList<RexLiteral>> tupleList =
+            tupleList(fieldNames.length, values);
+    final RelDataTypeFactory.FieldInfoBuilder rowTypeBuilder =
+            cluster.getTypeFactory().builder();
+    for (final Ord<String> fieldName : Ord.zip(fieldNames)) {
+      final String name =
+              fieldName.e != null ? fieldName.e : "expr$" + fieldName.i;
+      final RelDataType type = cluster.getTypeFactory().leastRestrictive(
+              new AbstractList<RelDataType>() {
+                public RelDataType get(int index) {
+                  return tupleList.get(index).get(fieldName.i).getType();
+                }
+
+                public int size() {
+                  return rowCount;
+                }
+              });
+      rowTypeBuilder.add(name, type);
+    }
+    final RelDataType rowType = rowTypeBuilder.build();
+    return values(tupleList, rowType);
+  }
+
+  private ImmutableList<ImmutableList<RexLiteral>> tupleList(int columnCount,
+                                                             Object[] values) {
+    final ImmutableList.Builder<ImmutableList<RexLiteral>> listBuilder =
+            ImmutableList.builder();
+    final List<RexLiteral> valueList = new ArrayList<>();
+    for (int i = 0; i < values.length; i++) {
+      Object value = values[i];
+      valueList.add((RexLiteral) literal(value));
+      if ((i + 1) % columnCount == 0) {
+        listBuilder.add(ImmutableList.copyOf(valueList));
+        valueList.clear();
+      }
+    }
+    return listBuilder.build();
+  }
+
+  /** Returns whether all values for a given column are null. */
+  private boolean allNull(Object[] values, int column, int columnCount) {
+    for (int i = column; i < values.length; i += columnCount) {
+      if (values[i] != null) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Empty relationship can be expressed in many different ways, e.g.,
+   * filter(cond=false), empty LogicalValues(), etc. Calcite default implementation
+   * uses empty LogicalValues(); however, currently there is not an equivalent to
+   * this expression in Hive. Thus, we use limit 0, since Hive already includes
+   * optimizations that will do early pruning of the result tree when it is found,
+   * e.g., GlobalLimitOptimizer.
+   */
+  public HiveSubQRemoveRelBuilder empty() {
+    final RelNode input = build();
+    final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort(
+            input, RelCollations.of(), null, literal(0));
+    return this.push(sort);
+  }
+
+
+  /** Creates a {@link Values} with a specified row type.
+   *
+   * <p>This method can handle cases that {@link #values(String[], Object...)}
+   * cannot, such as all values of a column being null, or there being zero
+   * rows.
+   *
+   * @param rowType Row type
+   * @param columnValues Values
+   */
+  public HiveSubQRemoveRelBuilder values(RelDataType rowType, Object... columnValues) {
+    final ImmutableList<ImmutableList<RexLiteral>> tupleList =
+            tupleList(rowType.getFieldCount(), columnValues);
+    RelNode values = valuesFactory.createValues(cluster, rowType,
+            ImmutableList.copyOf(tupleList));
+    push(values);
+    return this;
+  }
+
+  /** Creates a {@link Values} with a specified row type.
+   *
+   * <p>This method can handle cases that {@link #values(String[], Object...)}
+   * cannot, such as all values of a column being null, or there being zero
+   * rows.
+   *
+   * @param tupleList Tuple list
+   * @param rowType Row type
+   */
+  public HiveSubQRemoveRelBuilder values(Iterable<? extends List<RexLiteral>> tupleList,
+                           RelDataType rowType) {
+    RelNode values =
+            valuesFactory.createValues(cluster, rowType, copy(tupleList));
+    push(values);
+    return this;
+  }
+
+  /** Creates a {@link Values} with a specified row type and
+   * zero rows.
+   *
+   * @param rowType Row type
+   */
+  public HiveSubQRemoveRelBuilder values(RelDataType rowType) {
+    return values(ImmutableList.<ImmutableList<RexLiteral>>of(), rowType);
+  }
+
+  /** Converts an iterable of lists into an immutable list of immutable lists
+   * with the same contents. Returns the same object if possible. */
+  private static <E> ImmutableList<ImmutableList<E>>
+  copy(Iterable<? extends List<E>> tupleList) {
+    final ImmutableList.Builder<ImmutableList<E>> builder =
+            ImmutableList.builder();
+    int changeCount = 0;
+    for (List<E> literals : tupleList) {
+      final ImmutableList<E> literals2 =
+              ImmutableList.copyOf(literals);
+      builder.add(literals2);
+      if (literals != literals2) {
+        ++changeCount;
+      }
+    }
+    if (changeCount == 0) {
+      // don't make a copy if we don't have to
+      //noinspection unchecked
+      return (ImmutableList<ImmutableList<E>>) tupleList;
+    }
+    return builder.build();
+  }
+
+  /** Creates a limit without a sort. */
+  public HiveSubQRemoveRelBuilder limit(int offset, int fetch) {
+    return sortLimit(offset, fetch, ImmutableList.<RexNode>of());
+  }
+
+  /** Creates a {@link Sort} by field ordinals.
+   *
+   * <p>Negative fields mean descending: -1 means field(0) descending,
+   * -2 means field(1) descending, etc.
+   */
+  public HiveSubQRemoveRelBuilder sort(int... fields) {
+    final ImmutableList.Builder<RexNode> builder = ImmutableList.builder();
+    for (int field : fields) {
+      builder.add(field < 0 ? desc(field(-field - 1)) : field(field));
+    }
+    return sortLimit(-1, -1, builder.build());
+  }
+
+  /** Creates a {@link Sort} by expressions. */
+  public HiveSubQRemoveRelBuilder sort(RexNode... nodes) {
+    return sortLimit(-1, -1, ImmutableList.copyOf(nodes));
+  }
+
+  /** Creates a {@link Sort} by expressions. */
+  public HiveSubQRemoveRelBuilder sort(Iterable<? extends RexNode> nodes) {
+    return sortLimit(-1, -1, nodes);
+  }
+
+  /** Creates a {@link Sort} by expressions, with limit and offset. */
+  public HiveSubQRemoveRelBuilder sortLimit(int offset, int fetch, RexNode... nodes) {
+    return sortLimit(offset, fetch, ImmutableList.copyOf(nodes));
+  }
+
+  /** Creates a {@link Sort} by a list of expressions, with limit and offset.
+   *
+   * @param offset Number of rows to skip; non-positive means don't skip any
+   * @param fetch Maximum number of rows to fetch; negative means no limit
+   * @param nodes Sort expressions
+   */
+  public HiveSubQRemoveRelBuilder sortLimit(int offset, int fetch,
+                              Iterable<? extends RexNode> nodes) {
+    final List<RelFieldCollation> fieldCollations = new ArrayList<>();
+    final RelDataType inputRowType = peek().getRowType();
+    final List<RexNode> extraNodes = projects(inputRowType);
+    final List<RexNode> originalExtraNodes = ImmutableList.copyOf(extraNodes);
+    for (RexNode node : nodes) {
+      fieldCollations.add(
+              collation(node, RelFieldCollation.Direction.ASCENDING, null,
+                      extraNodes));
+    }
+    final RexNode offsetNode = offset <= 0 ? null : literal(offset);
+    final RexNode fetchNode = fetch < 0 ? null : literal(fetch);
+    if (offsetNode == null && fetch == 0) {
+      return empty();
+    }
+    if (offsetNode == null && fetchNode == null && fieldCollations.isEmpty()) {
+      return this; // sort is trivial
+    }
+
+    final boolean addedFields = extraNodes.size() > originalExtraNodes.size();
+    if (fieldCollations.isEmpty()) {
+      assert !addedFields;
+      RelNode top = peek();
+      if (top instanceof Sort) {
+        final Sort sort2 = (Sort) top;
+        if (sort2.offset == null && sort2.fetch == null) {
+          stack.pop();
+          push(sort2.getInput());
+          final RelNode sort =
+                  sortFactory.createSort(build(), sort2.collation,
+                          offsetNode, fetchNode);
+          push(sort);
+          return this;
+        }
+      }
+      if (top instanceof Project) {
+        final Project project = (Project) top;
+        if (project.getInput() instanceof Sort) {
+          final Sort sort2 = (Sort) project.getInput();
+          if (sort2.offset == null && sort2.fetch == null) {
+            stack.pop();
+            push(sort2.getInput());
+            final RelNode sort =
+                    sortFactory.createSort(build(), sort2.collation,
+                            offsetNode, fetchNode);
+            push(sort);
+            project(project.getProjects());
+            return this;
+          }
+        }
+      }
+    }
+    if (addedFields) {
+      project(extraNodes);
+    }
+    final RelNode sort =
+            sortFactory.createSort(build(), RelCollations.of(fieldCollations),
+                    offsetNode, fetchNode);
+    push(sort);
+    if (addedFields) {
+      project(originalExtraNodes);
+    }
+    return this;
+  }
+
+  private static RelFieldCollation collation(RexNode node,
+                                             RelFieldCollation.Direction direction,
+                                             RelFieldCollation.NullDirection nullDirection, List<RexNode> extraNodes) {
+    switch (node.getKind()) {
+      case INPUT_REF:
+        return new RelFieldCollation(((RexInputRef) node).getIndex(), direction,
+                Util.first(nullDirection, direction.defaultNullDirection()));
+      case DESCENDING:
+        return collation(((RexCall) node).getOperands().get(0),
+                RelFieldCollation.Direction.DESCENDING,
+                nullDirection, extraNodes);
+      case NULLS_FIRST:
+        return collation(((RexCall) node).getOperands().get(0), direction,
+                RelFieldCollation.NullDirection.FIRST, extraNodes);
+      case NULLS_LAST:
+        return collation(((RexCall) node).getOperands().get(0), direction,
+                RelFieldCollation.NullDirection.LAST, extraNodes);
+      default:
+        final int fieldIndex = extraNodes.size();
+        extraNodes.add(node);
+        return new RelFieldCollation(fieldIndex, direction,
+                Util.first(nullDirection, direction.defaultNullDirection()));
+    }
+  }
+
+  /**
+   * Creates a projection that converts the current relational expression's
+   * output to a desired row type.
+   *
+   * @param castRowType row type after cast
+   * @param rename      if true, use field names from castRowType; if false,
+   *                    preserve field names from rel
+   */
+  public HiveSubQRemoveRelBuilder convert(RelDataType castRowType, boolean rename) {
+    final RelNode r = build();
+    final RelNode r2 =
+            RelOptUtil.createCastRel(r, castRowType, rename, projectFactory);
+    push(r2);
+    return this;
+  }
+
+  public HiveSubQRemoveRelBuilder permute(Mapping mapping) {
+    assert mapping.getMappingType().isSingleSource();
+    assert mapping.getMappingType().isMandatorySource();
+    if (mapping.isIdentity()) {
+      return this;
+    }
+    final List<RexNode> exprList = Lists.newArrayList();
+    for (int i = 0; i < mapping.getTargetCount(); i++) {
+      exprList.add(field(mapping.getSource(i)));
+    }
+    return project(exprList);
+  }
+
+  public HiveSubQRemoveRelBuilder aggregate(GroupKey groupKey,
+                              List<AggregateCall> aggregateCalls) {
+    return aggregate(groupKey,
+            Lists.transform(
+                    aggregateCalls, new Function<AggregateCall, AggCall>() {
+                      public AggCall apply(AggregateCall input) {
+                        return new AggCallImpl2(input);
+                      }
+                    }));
+  }
+
+  /** Clears the stack.
+   *
+   * <p>The builder's state is now the same as when it was created. */
+  public void clear() {
+    stack.clear();
+  }
+
+  protected String getAlias() {
+    final Frame frame = stack.peek();
+    return frame.right.size() == 1
+            ? frame.right.get(0).left
+            : null;
+  }
+
+  /** Information necessary to create a call to an aggregate function.
+   *
+   * @see RelBuilder#aggregateCall */
+  public interface AggCall {
+  }
+
+  /** Information necessary to create the GROUP BY clause of an Aggregate.
+   *
+   * @see RelBuilder#groupKey */
+  public interface GroupKey {
+    /** Assigns an alias to this group key.
+     *
+     * <p>Used to assign field names in the {@code group} operation. */
+    GroupKey alias(String alias);
+  }
+
+  /** Implementation of {@link RelBuilder.GroupKey}. */
+  protected static class GroupKeyImpl implements GroupKey {
+    final ImmutableList<RexNode> nodes;
+    final boolean indicator;
+    final ImmutableList<ImmutableList<RexNode>> nodeLists;
+    final String alias;
+
+    GroupKeyImpl(ImmutableList<RexNode> nodes, boolean indicator,
+                 ImmutableList<ImmutableList<RexNode>> nodeLists, String alias) {
+      this.nodes = Preconditions.checkNotNull(nodes);
+      this.indicator = indicator;
+      this.nodeLists = nodeLists;
+      this.alias = alias;
+    }
+
+    @Override public String toString() {
+      return alias == null ? nodes.toString() : nodes + " as " + alias;
+    }
+
+    public GroupKey alias(String alias) {
+      return Objects.equals(this.alias, alias)
+              ? this
+              : new GroupKeyImpl(nodes, indicator, nodeLists, alias);
+    }
+  }
+
+  /** Implementation of {@link RelBuilder.AggCall}. */
+  private static class AggCallImpl implements AggCall {
+    private final SqlAggFunction aggFunction;
+    private final boolean distinct;
+    private final RexNode filter;
+    private final String alias;
+    private final ImmutableList<RexNode> operands;
+
+    AggCallImpl(SqlAggFunction aggFunction, boolean distinct, RexNode filter,
+                String alias, ImmutableList<RexNode> operands) {
+      this.aggFunction = aggFunction;
+      this.distinct = distinct;
+      this.filter = filter;
+      this.alias = alias;
+      this.operands = operands;
+    }
+  }
+
+  /** Implementation of {@link RelBuilder.AggCall} that wraps an
+   * {@link AggregateCall}. */
+  private static class AggCallImpl2 implements AggCall {
+    private final AggregateCall aggregateCall;
+
+    AggCallImpl2(AggregateCall aggregateCall) {
+      this.aggregateCall = Preconditions.checkNotNull(aggregateCall);
+    }
+  }
+
+  /** Builder stack frame.
+   *
+   * <p>Describes a previously created relational expression and
+   * information about how table aliases map into its row type. */
+  private static class Frame {
+    static final Function<Pair<String, RelDataType>, List<RelDataTypeField>> FN =
+            new Function<Pair<String, RelDataType>, List<RelDataTypeField>>() {
+              public List<RelDataTypeField> apply(Pair<String, RelDataType> input) {
+                return input.right.getFieldList();
+              }
+            };
+
+    final RelNode rel;
+    final ImmutableList<Pair<String, RelDataType>> right;
+
+    private Frame(RelNode rel, ImmutableList<Pair<String, RelDataType>> pairs) {
+      this.rel = rel;
+      this.right = pairs;
+    }
+
+    private Frame(RelNode rel) {
+      this(rel, ImmutableList.of(Pair.of(deriveAlias(rel), rel.getRowType())));
+    }
+
+    private static String deriveAlias(RelNode rel) {
+      if (rel instanceof TableScan) {
+        final List<String> names = rel.getTable().getQualifiedName();
+        if (!names.isEmpty()) {
+          return Util.last(names);
+        }
+      }
+      return null;
+    }
+
+    List<RelDataTypeField> fields() {
+      return CompositeList.ofCopy(Iterables.transform(right, FN));
+    }
+  }
+
+  /** Shuttle that shifts a predicate's inputs to the left, replacing early
+   * ones with references to a
+   * {@link org.apache.calcite.rex.RexCorrelVariable}. */
+  private class Shifter extends RexShuttle {
+    private final RelNode left;
+    private final CorrelationId id;
+    private final RelNode right;
+
+    Shifter(RelNode left, CorrelationId id, RelNode right) {
+      this.left = left;
+      this.id = id;
+      this.right = right;
+    }
+
+    public RexNode visitInputRef(RexInputRef inputRef) {
+      final RelDataType leftRowType = left.getRowType();
+      final RexBuilder rexBuilder = getRexBuilder();
+      final int leftCount = leftRowType.getFieldCount();
+      if (inputRef.getIndex() < leftCount) {
+        final RexNode v = rexBuilder.makeCorrel(leftRowType, id);
+        return rexBuilder.makeFieldAccess(v, inputRef.getIndex());
+      } else {
+        return rexBuilder.makeInputRef(right, inputRef.getIndex() - leftCount);
+      }
+    }
+  }
+}
+
+// End RelBuilder.java

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
index d5fa856..ce207da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
@@ -20,10 +20,19 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttle;
 import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexSubQuery;
 import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle;
 import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-
+import org.apache.calcite.rel.core.CorrelationId;
+import java.util.Set;
+import java.util.HashSet;
+ 
 public class HiveFilter extends Filter implements HiveRelNode {
 
   public HiveFilter(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) {
@@ -40,4 +49,65 @@ public class HiveFilter extends Filter implements HiveRelNode {
   public void implement(Implementor implementor) {
   }
 
+  private void findCorrelatedVar(RexNode node, Set<CorrelationId> allVars) {
+    if(node instanceof RexCall) {
+      RexCall nd = (RexCall)node;
+      for (RexNode rn : nd.getOperands()) {
+        if (rn instanceof RexFieldAccess) {
+          final RexNode ref = ((RexFieldAccess) rn).getReferenceExpr();
+          if (ref instanceof RexCorrelVariable) {
+              allVars.add(((RexCorrelVariable) ref).id);
+          }
+        } else {
+          findCorrelatedVar(rn, allVars);
+        }
+      }
+    }
+  }
+
+  //traverse the given node to find all correlated variables
+  // Note that correlated variables are supported in Filter only i.e. Where & Having
+  private void traverseFilter(RexNode node, Set<CorrelationId> allVars) {
+      if(node instanceof RexSubQuery) {
+          //we expect correlated variables in HiveFilter only for now.
+          // Also check for case where operator has 0 inputs .e.g TableScan
+          RelNode input = ((RexSubQuery)node).rel.getInput(0);
+          while(input != null && !(input instanceof HiveFilter)
+                  && input.getInputs().size() >=1) {
+              //we don't expect corr vars withing JOIN or UNION for now
+              // we only expect cor vars in top level filter
+              if(input.getInputs().size() > 1) {
+                  return;
+              }
+              input = input.getInput(0);
+          }
+          if(input != null && input instanceof HiveFilter) {
+              findCorrelatedVar(((HiveFilter)input).getCondition(), allVars);
+          }
+          return;
+      }
+      //AND, NOT etc
+      if(node instanceof RexCall) {
+          int numOperands = ((RexCall)node).getOperands().size();
+          for(int i=0; i<numOperands; i++) {
+              RexNode op = ((RexCall)node).getOperands().get(i);
+              traverseFilter(op, allVars);
+          }
+      }
+  }
+
+  @Override
+  public Set<CorrelationId> getVariablesSet() {
+      Set<CorrelationId> allCorrVars = new HashSet<>();
+      traverseFilter(condition, allCorrVars);
+      return allCorrVars;
+  }
+
+  public RelNode accept(RelShuttle shuttle) {
+    if (shuttle instanceof HiveRelShuttle) {
+      return ((HiveRelShuttle)shuttle).visit(this);
+    }
+    return shuttle.visit(this);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
index dc2fa86..5b67d9d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
@@ -34,6 +34,7 @@ import org.apache.calcite.rel.RelWriter;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.RelShuttle;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.util.ImmutableBitSet;
@@ -42,6 +43,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle;
 import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel.JoinAlgorithm;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel.DefaultJoinAlgorithm;
@@ -221,4 +223,12 @@ public class HiveJoin extends Join implements HiveRelNode {
         .item("cost", joinCost == null ?
                 "not available" : joinCost);
   }
+
+  //required for HiveRelDecorrelator
+  public RelNode accept(RelShuttle shuttle) {
+    if (shuttle instanceof HiveRelShuttle) {
+      return ((HiveRelShuttle)shuttle).visit(this);
+    }
+    return shuttle.visit(this);
+  }
 }

[15/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
index 1a006d8..b132cb6 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
@@ -33,7 +33,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -41,8 +44,24 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
+                    predicate: (value > 'val_9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string), value (type: string)
@@ -55,28 +74,21 @@ STAGE PLANS:
                         Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 3 
+        Map 6 
             Map Operator Tree:
                 TableScan
-                  alias: a
+                  alias: b
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string), _col1 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: string)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                          Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string), value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -84,19 +96,66 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col2 (type: string), _col3 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -244,7 +303,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -252,19 +314,16 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
-                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string)
+                      value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -272,23 +331,32 @@ STAGE PLANS:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
+                  Select Operator
+                    expressions: value (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: value (type: string)
-                      outputColumnNames: _col0
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -296,7 +364,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
@@ -309,6 +377,53 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1
+                Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator

[08/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/perf/query70.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out
index 581fd9b..c3ef2a8 100644
--- a/ql/src/test/results/clientpositive/perf/query70.q.out
+++ b/ql/src/test/results/clientpositive/perf/query70.q.out
@@ -75,92 +75,61 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE)
-Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
+Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
 Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
 Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:100
     Stage-1
-      Reducer 6
-      File Output Operator [FS_63]
-        Limit [LIM_62] (rows=100 width=88)
+      Reducer 7
+      File Output Operator [FS_64]
+        Limit [LIM_63] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_61] (rows=1045432122 width=88)
+          Select Operator [SEL_62] (rows=1149975358 width=88)
             Output:["_col0","_col1","_col2","_col3","_col4"]
-          <-Reducer 5 [SIMPLE_EDGE]
-            SHUFFLE [RS_60]
-              Select Operator [SEL_58] (rows=1045432122 width=88)
+          <-Reducer 6 [SIMPLE_EDGE]
+            SHUFFLE [RS_61]
+              Select Operator [SEL_59] (rows=1149975358 width=88)
                 Output:["_col0","_col1","_col2","_col3","_col4"]
-                PTF Operator [PTF_57] (rows=1045432122 width=88)
+                PTF Operator [PTF_58] (rows=1149975358 width=88)
                   Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 0) + grouping(_col5, 1)), CASE WHEN ((UDFToInteger(grouping(_col5, 1)) = 0)) THEN (_col0) ELSE (null) END"}]
-                  Select Operator [SEL_56] (rows=1045432122 width=88)
+                  Select Operator [SEL_57] (rows=1149975358 width=88)
                     Output:["_col0","_col1","_col4","_col5"]
-                  <-Reducer 4 [SIMPLE_EDGE]
-                    SHUFFLE [RS_55]
+                  <-Reducer 5 [SIMPLE_EDGE]
+                    SHUFFLE [RS_56]
                       PartitionCols:(grouping(_col5, 0) + grouping(_col5, 1)), CASE WHEN ((UDFToInteger(grouping(_col5, 1)) = 0)) THEN (_col0) ELSE (null) END
-                      Select Operator [SEL_54] (rows=1045432122 width=88)
+                      Select Operator [SEL_55] (rows=1149975358 width=88)
                         Output:["_col0","_col1","_col4","_col5"]
-                        Group By Operator [GBY_53] (rows=1045432122 width=88)
+                        Group By Operator [GBY_54] (rows=1149975358 width=88)
                           Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
-                        <-Reducer 3 [SIMPLE_EDGE]
-                          SHUFFLE [RS_52]
+                        <-Reducer 4 [SIMPLE_EDGE]
+                          SHUFFLE [RS_53]
                             PartitionCols:_col0, _col1, _col2
-                            Group By Operator [GBY_51] (rows=2090864244 width=88)
+                            Group By Operator [GBY_52] (rows=2299950717 width=88)
                               Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0
-                              Select Operator [SEL_49] (rows=696954748 width=88)
+                              Select Operator [SEL_50] (rows=766650239 width=88)
                                 Output:["_col0","_col1","_col2"]
-                                Merge Join Operator [MERGEJOIN_92] (rows=696954748 width=88)
-                                  Conds:RS_46._col1=RS_47._col0(Inner),Output:["_col2","_col6","_col7"]
-                                <-Reducer 2 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_46]
-                                    PartitionCols:_col1
-                                    Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88)
-                                      Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col1","_col2"]
-                                    <-Map 1 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_43]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_2] (rows=575995635 width=88)
-                                          Output:["_col0","_col1","_col2"]
-                                          Filter Operator [FIL_81] (rows=575995635 width=88)
-                                            predicate:(ss_sold_date_sk is not null and ss_store_sk is not null)
-                                            TableScan [TS_0] (rows=575995635 width=88)
-                                              default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
-                                    <-Map 7 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_44]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_5] (rows=8116 width=1119)
-                                          Output:["_col0"]
-                                          Filter Operator [FIL_82] (rows=8116 width=1119)
-                                            predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
-                                            TableScan [TS_3] (rows=73049 width=1119)
-                                              default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
-                                <-Reducer 9 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_47]
+                                Merge Join Operator [MERGEJOIN_92] (rows=766650239 width=88)
+                                  Conds:RS_47._col7=RS_48._col0(Inner),Output:["_col2","_col6","_col7"]
+                                <-Reducer 15 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_48]
                                     PartitionCols:_col0
-                                    Merge Join Operator [MERGEJOIN_91] (rows=127775039 width=88)
-                                      Conds:RS_39._col2=RS_40._col0(Left Semi),Output:["_col0","_col1","_col2"]
-                                    <-Map 8 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_39]
-                                        PartitionCols:_col2
-                                        Select Operator [SEL_8] (rows=1704 width=1910)
-                                          Output:["_col0","_col1","_col2"]
-                                          Filter Operator [FIL_83] (rows=1704 width=1910)
-                                            predicate:(s_state is not null and s_store_sk is not null)
-                                            TableScan [TS_6] (rows=1704 width=1910)
-                                              default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"]
+                                    Group By Operator [GBY_39] (rows=58079562 width=88)
+                                      Output:["_col0"],keys:KEY._col0
                                     <-Reducer 14 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_40]
+                                      SHUFFLE [RS_38]
                                         PartitionCols:_col0
-                                        Group By Operator [GBY_38] (rows=116159124 width=88)
+                                        Group By Operator [GBY_37] (rows=116159124 width=88)
                                           Output:["_col0"],keys:_col0
                                           Select Operator [SEL_32] (rows=116159124 width=88)
                                             Output:["_col0"]
@@ -182,21 +151,21 @@ Stage-0
                                                           Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6
                                                           Select Operator [SEL_24] (rows=696954748 width=88)
                                                             Output:["_col6","_col2"]
-                                                            Merge Join Operator [MERGEJOIN_90] (rows=696954748 width=88)
+                                                            Merge Join Operator [MERGEJOIN_91] (rows=696954748 width=88)
                                                               Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col6"]
-                                                            <-Map 16 [SIMPLE_EDGE]
+                                                            <-Map 17 [SIMPLE_EDGE]
                                                               SHUFFLE [RS_22]
                                                                 PartitionCols:_col0
                                                                 Select Operator [SEL_17] (rows=1704 width=1910)
                                                                   Output:["_col0","_col1"]
                                                                   Filter Operator [FIL_87] (rows=1704 width=1910)
-                                                                    predicate:(s_store_sk is not null and s_state is not null)
+                                                                    predicate:s_store_sk is not null
                                                                     TableScan [TS_15] (rows=1704 width=1910)
                                                                       default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
                                                             <-Reducer 11 [SIMPLE_EDGE]
                                                               SHUFFLE [RS_21]
                                                                 PartitionCols:_col1
-                                                                Merge Join Operator [MERGEJOIN_89] (rows=633595212 width=88)
+                                                                Merge Join Operator [MERGEJOIN_90] (rows=633595212 width=88)
                                                                   Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2"]
                                                                 <-Map 10 [SIMPLE_EDGE]
                                                                   SHUFFLE [RS_18]
@@ -207,7 +176,7 @@ Stage-0
                                                                         predicate:(ss_store_sk is not null and ss_sold_date_sk is not null)
                                                                         TableScan [TS_9] (rows=575995635 width=88)
                                                                           default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
-                                                                <-Map 15 [SIMPLE_EDGE]
+                                                                <-Map 16 [SIMPLE_EDGE]
                                                                   SHUFFLE [RS_19]
                                                                     PartitionCols:_col0
                                                                     Select Operator [SEL_14] (rows=8116 width=1119)
@@ -216,4 +185,41 @@ Stage-0
                                                                         predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
                                                                         TableScan [TS_12] (rows=73049 width=1119)
                                                                           default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
+                                <-Reducer 3 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_47]
+                                    PartitionCols:_col7
+                                    Merge Join Operator [MERGEJOIN_89] (rows=696954748 width=88)
+                                      Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"]
+                                    <-Map 9 [SIMPLE_EDGE]
+                                      SHUFFLE [RS_45]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_8] (rows=1704 width=1910)
+                                          Output:["_col0","_col1","_col2"]
+                                          Filter Operator [FIL_83] (rows=1704 width=1910)
+                                            predicate:s_store_sk is not null
+                                            TableScan [TS_6] (rows=1704 width=1910)
+                                              default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"]
+                                    <-Reducer 2 [SIMPLE_EDGE]
+                                      SHUFFLE [RS_44]
+                                        PartitionCols:_col1
+                                        Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88)
+                                          Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2"]
+                                        <-Map 1 [SIMPLE_EDGE]
+                                          SHUFFLE [RS_41]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_2] (rows=575995635 width=88)
+                                              Output:["_col0","_col1","_col2"]
+                                              Filter Operator [FIL_81] (rows=575995635 width=88)
+                                                predicate:(ss_sold_date_sk is not null and ss_store_sk is not null)
+                                                TableScan [TS_0] (rows=575995635 width=88)
+                                                  default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
+                                        <-Map 8 [SIMPLE_EDGE]
+                                          SHUFFLE [RS_42]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_5] (rows=8116 width=1119)
+                                              Output:["_col0"]
+                                              Filter Operator [FIL_82] (rows=8116 width=1119)
+                                                predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
+                                                TableScan [TS_3] (rows=73049 width=1119)
+                                                  default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/semijoin4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/semijoin4.q.out b/ql/src/test/results/clientpositive/semijoin4.q.out
index 3c065a9..89e4023 100644
--- a/ql/src/test/results/clientpositive/semijoin4.q.out
+++ b/ql/src/test/results/clientpositive/semijoin4.q.out
@@ -56,9 +56,10 @@ WHERE (t2.tinyint_col_21) IN (
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-5
+  Stage-2 depends on stages: Stage-1, Stage-6
   Stage-3 depends on stages: Stage-2
   Stage-5 is a root stage
+  Stage-6 depends on stages: Stage-5
   Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
@@ -69,7 +70,7 @@ STAGE PLANS:
             alias: t1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((UDFToInteger(tinyint_col_46) = -92) and decimal1309_col_65 is not null and bigint_col_13 is not null) (type: boolean)
+              predicate: (decimal1309_col_65 is not null and bigint_col_13 is not null and tinyint_col_46 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), tinyint_col_46 (type: tinyint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9))
@@ -85,7 +86,7 @@ STAGE PLANS:
             alias: t2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((UDFToInteger(tinyint_col_21) = -92) and tinyint_col_18 is not null and decimal2709_col_9 is not null) (type: boolean)
+              predicate: (tinyint_col_18 is not null and tinyint_col_21 is not null and decimal2709_col_9 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_18 (type: tinyint), tinyint_col_21 (type: tinyint)
@@ -105,27 +106,23 @@ STAGE PLANS:
             1 _col2 (type: tinyint), _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint)
           outputColumnNames: _col1, _col3, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-          Select Operator
-            expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint), UDFToInteger(_col7) (type: int)
-            outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col3 (type: int)
+              key expressions: UDFToInteger(_col7) (type: int)
               sort order: +
-              Map-reduce partition columns: _col3 (type: int)
+              Map-reduce partition columns: UDFToInteger(_col7) (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              value expressions: _col0 (type: smallint), _col1 (type: double), _col2 (type: tinyint)
+              value expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint)
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: int)
@@ -135,11 +132,11 @@ STAGE PLANS:
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col3 (type: int)
+            0 UDFToInteger(_col7) (type: int)
             1 _col0 (type: int)
-          outputColumnNames: _col0, _col1, _col2
+          outputColumnNames: _col1, _col3, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
@@ -153,27 +150,27 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: (UDFToShort(_col2) + _col0) (type: smallint), floor(_col1) (type: bigint)
+              key expressions: (UDFToShort(_col7) + _col1) (type: smallint), floor(_col3) (type: bigint)
               sort order: +-
-              Map-reduce partition columns: (UDFToShort(_col2) + _col0) (type: smallint)
+              Map-reduce partition columns: (UDFToShort(_col7) + _col1) (type: smallint)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              value expressions: _col0 (type: smallint), _col1 (type: double), _col2 (type: tinyint)
+              value expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: double), VALUE._col2 (type: tinyint)
-          outputColumnNames: _col0, _col1, _col2
+          expressions: VALUE._col1 (type: smallint), VALUE._col3 (type: double), VALUE._col7 (type: tinyint)
+          outputColumnNames: _col1, _col3, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
                   input alias: ptf_0
-                  output shape: _col0: smallint, _col1: double, _col2: tinyint
+                  output shape: _col1: smallint, _col3: double, _col7: tinyint
                   type: WINDOWING
                 Windowing table definition
                   input alias: ptf_1
                   name: windowingtablefunction
-                  order by: (UDFToShort(_col2) + _col0) ASC NULLS FIRST, floor(_col1) DESC NULLS LAST
-                  partition by: (UDFToShort(_col2) + _col0)
+                  order by: (UDFToShort(_col7) + _col1) ASC NULLS FIRST, floor(_col3) DESC NULLS LAST
+                  partition by: (UDFToShort(_col7) + _col1)
                   raw input shape:
                   window functions:
                       window function definition
@@ -237,21 +234,39 @@ STAGE PLANS:
             0 _col0 (type: decimal(19,11)), _col1 (type: timestamp)
             1 _col0 (type: decimal(19,11)), _col1 (type: timestamp)
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-          Select Operator
-            expressions: -92 (type: int)
+          Group By Operator
+            keys: -92 (type: int)
+            mode: hash
             outputColumnNames: _col0
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-            Group By Operator
-              keys: _col0 (type: int)
-              mode: hash
-              outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/semijoin5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out b/ql/src/test/results/clientpositive/semijoin5.q.out
index 63b477c..20d372a 100644
--- a/ql/src/test/results/clientpositive/semijoin5.q.out
+++ b/ql/src/test/results/clientpositive/semijoin5.q.out
@@ -48,10 +48,14 @@ WHERE (t2.smallint_col_19) IN (SELECT
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-6
+  Stage-2 depends on stages: Stage-1, Stage-8
   Stage-3 depends on stages: Stage-2
   Stage-4 depends on stages: Stage-3
-  Stage-6 is a root stage
+  Stage-9 is a root stage
+  Stage-10 depends on stages: Stage-9
+  Stage-6 depends on stages: Stage-10
+  Stage-7 depends on stages: Stage-6
+  Stage-8 depends on stages: Stage-7
   Stage-0 depends on stages: Stage-4
 
 STAGE PLANS:
@@ -62,7 +66,7 @@ STAGE PLANS:
             alias: t1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null and timestamp_col_9 is not null) (type: boolean)
+              predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: tinyint_col_3 (type: tinyint), bigint_col_7 (type: bigint), timestamp_col_9 (type: timestamp), double_col_16 (type: double), decimal2016_col_26 (type: decimal(20,16)), smallint_col_50 (type: smallint)
@@ -78,7 +82,7 @@ STAGE PLANS:
             alias: t2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((UDFToInteger(smallint_col_19) = -92) and tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean)
+              predicate: (tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: decimal2709_col_9 (type: decimal(27,9)), int_col_10 (type: int), tinyint_col_15 (type: tinyint), smallint_col_19 (type: smallint), tinyint_col_20 (type: tinyint)
@@ -99,41 +103,37 @@ STAGE PLANS:
             1 _col4 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col2) (type: bigint)
           outputColumnNames: _col2, _col3, _col5, _col7, _col9
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-          Select Operator
-            expressions: _col2 (type: timestamp), _col3 (type: double), _col5 (type: smallint), _col7 (type: int), UDFToInteger(_col9) (type: int)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: timestamp), _col4 (type: int)
+              key expressions: _col2 (type: timestamp), UDFToInteger(_col9) (type: int)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: timestamp), _col4 (type: int)
+              Map-reduce partition columns: _col2 (type: timestamp), UDFToInteger(_col9) (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              value expressions: _col1 (type: double), _col2 (type: smallint), _col3 (type: int)
+              value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: timestamp), _col1 (type: int)
+              key expressions: _col1 (type: timestamp), _col0 (type: int)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int)
+              Map-reduce partition columns: _col1 (type: timestamp), _col0 (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col0 (type: timestamp), _col4 (type: int)
-            1 _col0 (type: timestamp), _col1 (type: int)
-          outputColumnNames: _col1, _col2, _col3
+            0 _col2 (type: timestamp), UDFToInteger(_col9) (type: int)
+            1 _col1 (type: timestamp), _col0 (type: int)
+          outputColumnNames: _col3, _col5, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
@@ -147,27 +147,27 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: (_col3 + UDFToInteger(_col2)) (type: int), floor(_col1) (type: bigint)
+              key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint)
               sort order: +-
-              Map-reduce partition columns: (_col3 + UDFToInteger(_col2)) (type: int)
+              Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              value expressions: _col1 (type: double), _col2 (type: smallint), _col3 (type: int)
+              value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col1 (type: double), VALUE._col2 (type: smallint), VALUE._col3 (type: int)
-          outputColumnNames: _col1, _col2, _col3
+          expressions: VALUE._col3 (type: double), VALUE._col5 (type: smallint), VALUE._col7 (type: int)
+          outputColumnNames: _col3, _col5, _col7
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
                   input alias: ptf_0
-                  output shape: _col1: double, _col2: smallint, _col3: int
+                  output shape: _col3: double, _col5: smallint, _col7: int
                   type: WINDOWING
                 Windowing table definition
                   input alias: ptf_1
                   name: windowingtablefunction
-                  order by: (_col3 + UDFToInteger(_col2)) ASC NULLS FIRST, floor(_col1) DESC NULLS LAST
-                  partition by: (_col3 + UDFToInteger(_col2))
+                  order by: (_col7 + UDFToInteger(_col5)) ASC NULLS FIRST, floor(_col3) DESC NULLS LAST
+                  partition by: (_col7 + UDFToInteger(_col5))
                   raw input shape:
                   window functions:
                       window function definition
@@ -179,8 +179,8 @@ STAGE PLANS:
                         isPivotResult: true
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Select Operator
-              expressions: LEAD_window_0 (type: int), _col1 (type: double), _col2 (type: smallint), _col3 (type: int)
-              outputColumnNames: LEAD_window_0, _col1, _col2, _col3
+              expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int)
+              outputColumnNames: LEAD_window_0, _col3, _col5, _col7
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               File Output Operator
                 compressed: false
@@ -194,27 +194,27 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: (_col3 + UDFToInteger(_col2)) (type: int), floor(_col1) (type: bigint)
+              key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint)
               sort order: --
-              Map-reduce partition columns: (_col3 + UDFToInteger(_col2)) (type: int)
+              Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              value expressions: LEAD_window_0 (type: int), _col1 (type: double), _col2 (type: smallint), _col3 (type: int)
+              value expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col0 (type: int), VALUE._col2 (type: double), VALUE._col3 (type: smallint), VALUE._col4 (type: int)
-          outputColumnNames: _col0, _col2, _col3, _col4
+          expressions: VALUE._col0 (type: int), VALUE._col4 (type: double), VALUE._col6 (type: smallint), VALUE._col8 (type: int)
+          outputColumnNames: _col0, _col4, _col6, _col8
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
                   input alias: ptf_0
-                  output shape: _col0: int, _col2: double, _col3: smallint, _col4: int
+                  output shape: _col0: int, _col4: double, _col6: smallint, _col8: int
                   type: WINDOWING
                 Windowing table definition
                   input alias: ptf_1
                   name: windowingtablefunction
-                  order by: (_col4 + UDFToInteger(_col3)) DESC NULLS LAST, floor(_col2) DESC NULLS LAST
-                  partition by: (_col4 + UDFToInteger(_col3))
+                  order by: (_col8 + UDFToInteger(_col6)) DESC NULLS LAST, floor(_col4) DESC NULLS LAST
+                  partition by: (_col8 + UDFToInteger(_col6))
                   raw input shape:
                   window functions:
                       window function definition
@@ -225,7 +225,7 @@ STAGE PLANS:
                         window frame: PRECEDING(MAX)~FOLLOWING(48)
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Select Operator
-              expressions: COALESCE(498,_col0,524) (type: int), (_col4 + UDFToInteger(_col3)) (type: int), floor(_col2) (type: bigint), COALESCE(sum_window_1,704) (type: bigint)
+              expressions: COALESCE(498,_col0,524) (type: int), (_col8 + UDFToInteger(_col6)) (type: int), floor(_col4) (type: bigint), COALESCE(sum_window_1,704) (type: bigint)
               outputColumnNames: _col0, _col1, _col2, _col3
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               File Output Operator
@@ -236,40 +236,149 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-6
+  Stage: Stage-9
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: tt1
+            alias: t1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: decimal2612_col_77 is not null (type: boolean)
+              predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
-                expressions: decimal2612_col_77 (type: decimal(26,12))
-                outputColumnNames: _col0
+                expressions: tinyint_col_3 (type: tinyint), bigint_col_7 (type: bigint), timestamp_col_9 (type: timestamp), decimal2016_col_26 (type: decimal(20,16))
+                outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: decimal(26,12))
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: decimal(26,12))
+                  key expressions: _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  value expressions: _col2 (type: timestamp)
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_15 (type: tinyint), tinyint_col_20 (type: tinyint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint)
+                  sort order: +++
+                  Map-reduce partition columns: _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint)
+            1 _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint)
+          outputColumnNames: _col2
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          Group By Operator
+            keys: _col2 (type: timestamp)
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: timestamp)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: timestamp)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: timestamp)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: tt2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: (decimal1911_col_16 is not null and timestamp_col_18 is not null) (type: boolean)
+              predicate: decimal1911_col_16 is not null (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
+                  key expressions: _col1 (type: timestamp)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: timestamp)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  value expressions: _col0 (type: decimal(19,11))
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: timestamp)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: timestamp)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: timestamp)
+            1 _col0 (type: timestamp)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: decimal(26,12))
+              sort order: +
+              Map-reduce partition columns: _col0 (type: decimal(26,12))
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              value expressions: _col2 (type: timestamp)
+          TableScan
+            alias: tt1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: decimal2612_col_77 is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: decimal2612_col_77 (type: decimal(26,12))
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
                   key expressions: _col0 (type: decimal(26,12))
                   sort order: +
                   Map-reduce partition columns: _col0 (type: decimal(26,12))
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                  value expressions: _col1 (type: timestamp)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -280,11 +389,11 @@ STAGE PLANS:
           outputColumnNames: _col2
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           Select Operator
-            expressions: _col2 (type: timestamp), -92 (type: int)
-            outputColumnNames: _col0, _col1
+            expressions: _col2 (type: timestamp)
+            outputColumnNames: _col1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Group By Operator
-              keys: _col0 (type: timestamp), _col1 (type: int)
+              keys: _col1 (type: timestamp), -92 (type: int)
               mode: hash
               outputColumnNames: _col0, _col1
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -295,6 +404,32 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: timestamp), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: timestamp), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: int), _col0 (type: timestamp)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
index 567c6d3..a40115c 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
@@ -95,7 +95,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4)
+        Reducer 5 <- Reducer 4 (GROUP, 4)
+        Reducer 7 <- Map 6 (GROUP, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -104,7 +107,7 @@ STAGE PLANS:
                   alias: li
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
+                    predicate: (l_linenumber = 1) (type: boolean)
                     Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
@@ -122,27 +125,42 @@ STAGE PLANS:
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean)
-                    Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
+                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), 1 (type: int)
+                      expressions: l_orderkey (type: int), l_linenumber (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: int), _col1 (type: int)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int), _col1 (type: int)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                          Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: li
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: l_linenumber (type: int)
+                    outputColumnNames: l_linenumber
+                    Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: l_linenumber (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: int), 1 (type: int)
                   1 _col0 (type: int), _col1 (type: int)
@@ -159,6 +177,50 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col3
+                Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: int), _col3 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                    Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                  Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
+        Reducer 7 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index b58fcbe..c28a218 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -32,7 +32,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+        Reducer 7 <- Map 6 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -40,8 +43,22 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
+                    predicate: (value > 'val_9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
@@ -52,45 +69,86 @@ STAGE PLANS:
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                         Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-        Map 3 
+        Map 6 
             Map Operator Tree:
                 TableScan
-                  alias: a
+                  alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: key, value
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: key (type: string), value (type: string)
+                      mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: string), _col1 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: string)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                          Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col2 (type: string), _col3 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+        Reducer 7 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -237,7 +295,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+        Reducer 7 <- Map 6 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -245,46 +306,54 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string)
+                      value expressions: _col0 (type: string)
         Map 3 
             Map Operator Tree:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
+                  Select Operator
+                    expressions: value (type: string)
+                    outputColumnNames: _col0
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: value (type: string)
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string)
+                    outputColumnNames: value
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: value (type: string)
+                      mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
@@ -297,6 +366,50 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+        Reducer 7 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator

[10/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index 76c8404..0fdcf53 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -3052,17 +3052,17 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS)
-        Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS)
+        Reducer 7 <- Union 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ds (type: string)
@@ -3095,7 +3095,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3120,7 +3120,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3157,35 +3157,45 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Reducer 8 
+                      Target Vertex: Map 1
+        Reducer 9 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -3193,34 +3203,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
         Union 6 
             Vertex: Union 6
 
@@ -3260,17 +3252,17 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS)
-        Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS)
+        Reducer 7 <- Union 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ds (type: string)
@@ -3303,7 +3295,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3328,7 +3320,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3367,35 +3359,45 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Reducer 8 
+                      Target Vertex: Map 1
+        Reducer 9 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -3403,34 +3405,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
         Union 6 
             Vertex: Union 6
 
@@ -3471,9 +3455,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 9 (CONTAINS)
+        Reducer 10 <- Union 9 (SIMPLE_EDGE)
+        Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 9 (CONTAINS)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
-        Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE)
         Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
         Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS)
 #### A masked pattern was here ####
@@ -3482,7 +3467,6 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: ds (type: string)
@@ -3496,7 +3480,7 @@ STAGE PLANS:
                       Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 10 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3520,7 +3504,6 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: ds (type: string)
@@ -3554,57 +3537,67 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 11 
+        Reducer 10 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
+                      Target Vertex: Map 1
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                    Dynamic Partitioning Event Operator
+                      Target column: ds (string)
+                      Target Input: srcpart
+                      Partition key expr: ds
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 5
+                      Target Vertex: Map 5
+        Reducer 12 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -3623,7 +3616,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3657,49 +3650,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 5
         Union 3 
             Vertex: Union 3
         Union 9 
@@ -5318,45 +5278,29 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Union 5 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 5 (CONTAINS)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 4 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ds (type: string)
                     outputColumnNames: _col0
                     Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Map Join Operator
-                      condition map:
-                           Left Semi Join 0 to 1
-                      keys:
-                        0 _col0 (type: string)
-                        1 _col0 (type: string)
-                      outputColumnNames: _col0
-                      input vertices:
-                        1 Union 5
-                      Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -5376,7 +5320,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 6 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -5396,32 +5340,42 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 2 
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
+                aggregations: max(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                File Output Operator
-                  compressed: false
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col0
+                  input vertices:
+                    0 Map 1
+                  Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
@@ -5432,22 +5386,22 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Reducer 7 
+        Reducer 6 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 8 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -5455,36 +5409,18 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                        Dynamic Partitioning Event Operator
-                          Target column: ds (string)
-                          Target Input: srcpart
-                          Partition key expr: ds
-                          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
-                          Target Vertex: Map 1
-        Union 5 
-            Vertex: Union 5
+        Union 4 
+            Vertex: Union 4
 
   Stage: Stage-0
     Fetch Operator

[11/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out b/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out
new file mode 100644
index 0000000..0507ab7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/subquery_shared_alias.q.out
@@ -0,0 +1,23 @@
+PREHOOK: query: select *
+from src
+where src.key in (select key from src where key > '9')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src
+where src.key in (select key from src where key > '9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/subquery_views.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out
index 4cd0485..cf963b2 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out
@@ -111,8 +111,6 @@ where `b`.`key` not in
   from `default`.`src` `a` 	 	 
   where `b`.`value` = `a`.`value`  and `a`.`key` = `b`.`key` and `a`.`value` > 'val_11'	 	 
   ), tableType:VIRTUAL_VIEW, rewriteEnabled:false)		 
-Warning: Shuffle Join MERGEJOIN[67][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[69][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product
 PREHOOK: query: explain
 select * 
 from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
@@ -130,13 +128,28 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-        Reducer 12 <- Map 11 (SIMPLE_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-        Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
-        Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
+        Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+        Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+        Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
+        Reducer 15 <- Map 14 (SIMPLE_EDGE)
+        Reducer 17 <- Map 16 (SIMPLE_EDGE)
+        Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE)
+        Reducer 21 <- Reducer 20 (SIMPLE_EDGE)
+        Reducer 23 <- Map 22 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE)
+        Reducer 24 <- Reducer 23 (SIMPLE_EDGE)
+        Reducer 26 <- Map 25 (SIMPLE_EDGE)
+        Reducer 28 <- Map 27 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE)
+        Reducer 29 <- Reducer 28 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 30 <- Reducer 29 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
+        Reducer 32 <- Map 31 (SIMPLE_EDGE)
+        Reducer 34 <- Map 33 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -154,112 +167,340 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        sort order: 
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                         Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string), _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 11 
+        Map 10 
             Map Operator Tree:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (value > 'val_11') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: bigint)
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 13 
+        Map 14 
             Map Operator Tree:
                 TableScan
-                  alias: a
+                  alias: b
+                  properties:
+                    insideView TRUE
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string), value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 16 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  properties:
+                    insideView TRUE
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 18 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  properties:
+                    insideView TRUE
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((value > 'val_11') and (key < '11')) (type: boolean)
-                    Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key < '11') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: key (type: string), value (type: string), key (type: string)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string)
-                        sort order: +++
-                        Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string)
-                        Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 5 
+        Map 22 
             Map Operator Tree:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (value > 'val_11') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: bigint)
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 25 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  properties:
+                    insideView TRUE
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string), value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 27 
             Map Operator Tree:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((value > 'val_11') and (key < '11')) (type: boolean)
-                    Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (value > 'val_11') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: key (type: string), value (type: string), key (type: string)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string)
-                        sort order: +++
-                        Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string)
-                        Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 8 
+        Map 31 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  properties:
+                    insideView TRUE
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string), value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 33 
             Map Operator Tree:
                 TableScan
                   alias: b
                   properties:
                     insideView TRUE
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key < '11') (type: boolean)
+                    predicate: (value > 'val_11') (type: boolean)
                     Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        sort order: 
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                         Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string), _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 10 
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  properties:
+                    insideView TRUE
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: key (type: string), value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 11 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col2, _col3
+                Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string), _col2 (type: string), _col3 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    sort order: +++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 12 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: boolean)
+        Reducer 13 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col4 (type: string), _col2 (type: string), _col1 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col4 (type: string), _col2 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col3 (type: boolean)
+        Reducer 15 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 17 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 19 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col4, _col5
+                Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
+                  Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col4 (type: bigint), _col5 (type: bigint)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col4, _col5
+                Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
+                  Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col4 (type: bigint), _col5 (type: bigint)
+        Reducer 20 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
@@ -267,16 +508,16 @@ STAGE PLANS:
                      Left Outer Join0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string), _col0 (type: string)
-                  1 _col2 (type: string), _col1 (type: string), _col0 (type: string)
-                outputColumnNames: _col0, _col3
-                Statistics: Num rows: 166 Data size: 28884 Basic stats: COMPLETE Column stats: COMPLETE
+                  1 _col4 (type: string), _col2 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col4, _col5, _col9
+                Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: _col3 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+                  Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col0 (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE
                     Group By Operator
                       keys: _col0 (type: string)
                       mode: hash
@@ -287,38 +528,113 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 12 
+        Reducer 21 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col0 = 0) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 2 
+                Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 23 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 
-                  1 
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col2, _col3
+                Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string)
+                  outputColumnNames: _col2, _col3, _col0
+                  Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(), count(_col0)
+                    keys: _col2 (type: string), _col3 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 24 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 26 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
-                  sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 28 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col2, _col3
+                Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string), _col2 (type: string), _col3 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    sort order: +++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 29 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: boolean)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -327,36 +643,79 @@ STAGE PLANS:
                      Left Outer Join0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string), _col0 (type: string)
-                  1 _col2 (type: string), _col1 (type: string), _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 166 Data size: 43990 Basic stats: COMPLETE Column stats: COMPLETE
+                  1 _col4 (type: string), _col2 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col4, _col5, _col9
+                Statistics: Num rows: 166 Data size: 32868 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: _col3 is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+                  Statistics: Num rows: 83 Data size: 16434 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col0 (type: string), _col1 (type: string)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: string)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: string)
+        Reducer 30 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col4 (type: string), _col2 (type: string), _col1 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col4 (type: string), _col2 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col3 (type: boolean)
+        Reducer 32 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 34 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
         Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -364,35 +723,58 @@ STAGE PLANS:
         Reducer 6 
             Execution mode: llap
             Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: string)
+                  1 _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col2, _col3
+                Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string)
+                  outputColumnNames: _col2, _col3, _col0
+                  Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(), count(_col0)
+                    keys: _col2 (type: string), _col3 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
+                aggregations: count(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col0 = 0) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col3 (type: bigint)
         Reducer 9 
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
-                  sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -400,8 +782,6 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[67][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[69][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product
 PREHOOK: query: select * 
 from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index ff658d7..27f32db 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -1,4 +1,4 @@
-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly.
 explain
 select *
@@ -18,57 +18,61 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
 Reducer 4 <- Map 3 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 vectorized, llap
-      File Output Operator [FS_36]
-        Select Operator [SEL_35] (rows=1 width=178)
+      File Output Operator [FS_37]
+        Select Operator [SEL_36] (rows=250 width=178)
           Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_21]
-            Select Operator [SEL_20] (rows=1 width=178)
+          SHUFFLE [RS_23]
+            Select Operator [SEL_22] (rows=250 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_19] (rows=1 width=265)
-                predicate:_col3 is null
-                Map Join Operator [MAPJOIN_28] (rows=1219 width=265)
-                  Conds:MAPJOIN_27._col0=RS_17._col0(Left Outer),Output:["_col0","_col1","_col3"]
-                <-Map 5 [BROADCAST_EDGE] llap
-                  BROADCAST [RS_17]
+              Filter Operator [FIL_21] (rows=250 width=198)
+                predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+                Map Join Operator [MAPJOIN_29] (rows=500 width=198)
+                  Conds:MAPJOIN_28._col0=RS_35._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
+                <-Reducer 6 [BROADCAST_EDGE] vectorized, llap
+                  BROADCAST [RS_35]
                     PartitionCols:_col0
-                    Select Operator [SEL_12] (rows=500 width=87)
-                      Output:["_col0"]
-                      TableScan [TS_11] (rows=500 width=87)
-                        default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-                <-Map Join Operator [MAPJOIN_27] (rows=500 width=178)
-                    Conds:(Inner),Output:["_col0","_col1"]
+                    Group By Operator [GBY_34] (rows=205 width=91)
+                      Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                    <-Map 5 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_12]
+                        PartitionCols:_col0, _col1
+                        Group By Operator [GBY_11] (rows=205 width=91)
+                          Output:["_col0","_col1"],keys:_col0, true
+                          Select Operator [SEL_9] (rows=500 width=87)
+                            Output:["_col0"]
+                            TableScan [TS_8] (rows=500 width=87)
+                              default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                <-Map Join Operator [MAPJOIN_28] (rows=500 width=194)
+                    Conds:(Inner),Output:["_col0","_col1","_col2","_col3"]
                   <-Reducer 4 [BROADCAST_EDGE] vectorized, llap
-                    BROADCAST [RS_34]
-                      Select Operator [SEL_33] (rows=1 width=8)
-                        Filter Operator [FIL_32] (rows=1 width=8)
-                          predicate:(_col0 = 0)
-                          Group By Operator [GBY_31] (rows=1 width=8)
-                            Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                          <-Map 3 [SIMPLE_EDGE] llap
-                            SHUFFLE [RS_6]
-                              Group By Operator [GBY_5] (rows=1 width=8)
-                                Output:["_col0"],aggregations:["count()"]
-                                Select Operator [SEL_4] (rows=1 width=87)
-                                  Filter Operator [FIL_25] (rows=1 width=87)
-                                    predicate:key is null
-                                    TableScan [TS_2] (rows=500 width=87)
-                                      default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                    BROADCAST [RS_33]
+                      Group By Operator [GBY_32] (rows=1 width=16)
+                        Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"]
+                      <-Map 3 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_5]
+                          Group By Operator [GBY_4] (rows=1 width=16)
+                            Output:["_col0","_col1"],aggregations:["count()","count(key)"]
+                            Select Operator [SEL_3] (rows=500 width=87)
+                              Output:["key"]
+                              TableScan [TS_2] (rows=500 width=87)
+                                default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
                   <-Select Operator [SEL_1] (rows=500 width=178)
                       Output:["_col0","_col1"]
                       TableScan [TS_0] (rows=500 width=178)
                         default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: select *
 from src
 where not key in

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index a075662..22d1bbe 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -31,54 +31,64 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE)
-        Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: lineitem
+                  Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: l_partkey (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
                   alias: li
                   Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
+                    predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
                     Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
-                      Map Join Operator
-                        condition map:
-                             Left Semi Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col1 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col1 (type: int)
-                          Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col2 (type: int)
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 2 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
                     Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: l_orderkey (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                      outputColumnNames: l_orderkey
+                      Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
-                        keys: _col0 (type: int)
+                        keys: l_orderkey (type: int)
                         mode: hash
                         outputColumnNames: _col0
                         Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
@@ -89,27 +99,7 @@ STAGE PLANS:
                           Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: lineitem
-                  Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: l_partkey is not null (type: boolean)
-                    Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      keys: l_partkey (type: int)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-            Execution mode: llap
-            LLAP IO: no inputs
-        Reducer 4 
+        Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -121,23 +111,46 @@ STAGE PLANS:
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col1 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col2, _col4
+                    0 _col0 (type: int)
+                    1 _col1 (type: int)
+                  outputColumnNames: _col0, _col1, _col3
                   input vertices:
-                    0 Map 1
-                  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col4 (type: int), _col2 (type: int)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-                      table:
-                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    1 Map 3
+                  Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col0, _col3
+                    input vertices:
+                      1 Reducer 5
+                    Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: int), _col3 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -192,65 +205,74 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE)
-        Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE)
+        Map 7 <- Reducer 6 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 4 (BROADCAST_EDGE), Map 7 (SIMPLE_EDGE)
+        Reducer 9 <- Reducer 2 (BROADCAST_EDGE), Reducer 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: lineitem
+                  Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: l_partkey (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
                   alias: li
                   Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
+                    predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
                     Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
-                      outputColumnNames: _col0, _col1, _col2
+                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
-                      Map Join Operator
-                        condition map:
-                             Left Semi Join 0 to 1
-                        keys:
-                          0 _col0 (type: int), 1 (type: int)
-                          1 _col0 (type: int), _col1 (type: int)
-                        outputColumnNames: _col1, _col2
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col1 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col1 (type: int)
-                          Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col2 (type: int)
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 2 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean)
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
                     Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: l_orderkey (type: int), 1 (type: int)
+                      expressions: l_orderkey (type: int), l_linenumber (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: int), _col1 (type: int)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int), _col1 (type: int)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                          Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 3 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
@@ -270,7 +292,41 @@ STAGE PLANS:
                         Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 4 
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: li
+                  Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: l_partkey (type: int), l_linenumber (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col2
+                        input vertices:
+                          0 Reducer 6
+                        Statistics: Num rows: 34 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          keys: _col2 (type: int)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: int)
+                            sort order: +
+                            Map-reduce partition columns: _col0 (type: int)
+                            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -282,19 +338,84 @@ STAGE PLANS:
                   condition map:
                        Inner Join 0 to 1
                   keys:
+                    0 _col0 (type: int)
+                    1 _col1 (type: int)
+                  outputColumnNames: _col0, _col1, _col3, _col4
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: int), _col4 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: int), _col4 (type: int)
+                    Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: int), _col3 (type: int)
+        Reducer 6 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 8 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
                     0 _col1 (type: int)
                     1 _col0 (type: int)
-                  outputColumnNames: _col2, _col4
+                  outputColumnNames: _col0, _col3
                   input vertices:
-                    0 Map 1
-                  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col4 (type: int), _col2 (type: int)
+                    0 Map 4
+                  Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col0 (type: int), _col3 (type: int)
+                    mode: hash
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col1 (type: int), _col4 (type: int)
+                    1 _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col3
+                  input vertices:
+                    0 Reducer 2
+                  Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: int), _col3 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[20/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
index 447db8e..a0dfbb1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
@@ -25,6 +25,7 @@ import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttle;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
@@ -37,6 +38,7 @@ import org.apache.calcite.util.mapping.MappingType;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle;
 import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
 
 import com.google.common.collect.ImmutableList;
@@ -188,4 +190,12 @@ public class HiveProject extends Project implements HiveRelNode {
     return isSysnthetic;
   }
 
+  //required for HiveRelDecorrelator
+  @Override public RelNode accept(RelShuttle shuttle) {
+    if(shuttle instanceof HiveRelShuttle) {
+      return ((HiveRelShuttle)shuttle).visit(this);
+    }
+    return shuttle.visit(this);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
index 6ed2914..ed20da3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
@@ -24,8 +24,10 @@ import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollationTraitDef;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttle;
 import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle;
 import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
 
 import com.google.common.collect.ImmutableMap;
@@ -107,4 +109,12 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
     this.ruleCreated = ruleCreated;
   }
 
+  //required for HiveRelDecorrelator
+  public RelNode accept(RelShuttle shuttle) {
+    if (shuttle instanceof HiveRelShuttle) {
+      return ((HiveRelShuttle)shuttle).visit(this);
+    }
+    return shuttle.visit(this);
+  }
+
 }

[03/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_notin_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out
index 804f411..9f72cc9 100644
--- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: -- non agg, non corr
 
 explain
@@ -24,8 +24,9 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1, Stage-4
-  Stage-3 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2, Stage-5
   Stage-4 is a root stage
+  Stage-5 is a root stage
   Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
@@ -76,7 +77,8 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -84,8 +86,8 @@ STAGE PLANS:
           keys:
             0 
             1 
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -101,23 +103,15 @@ STAGE PLANS:
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: bigint)
+              Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
           TableScan
-            alias: s1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (key > '12') (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -125,18 +119,18 @@ STAGE PLANS:
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col3
-          Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1, _col2, _col3, _col5
+          Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
-            Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE
+            predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
+            Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: bigint)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 137 Data size: 2688 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -147,38 +141,67 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: s1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
-            Select Operator
-              Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
-              Filter Operator
-                predicate: false (type: boolean)
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key > '12') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(), count(key)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key > '12') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: count()
+                  keys: _col0 (type: string), true (type: boolean)
                   mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col0 (type: bigint)
+                    key expressions: _col0 (type: string), _col1 (type: boolean)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
           mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -186,7 +209,6 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: -- non agg, corr
 explain
 select b.p_mfgr, min(p_retailprice) 
@@ -211,11 +233,20 @@ having b.p_mfgr not in
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-5
-  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-2 depends on stages: Stage-1, Stage-6
+  Stage-3 depends on stages: Stage-2, Stage-12
   Stage-4 is a root stage
-  Stage-5 depends on stages: Stage-4
-  Stage-6 is a root stage
+  Stage-5 depends on stages: Stage-4, Stage-8
+  Stage-6 depends on stages: Stage-5
+  Stage-7 is a root stage
+  Stage-8 depends on stages: Stage-7
+  Stage-9 is a root stage
+  Stage-10 depends on stages: Stage-9, Stage-14
+  Stage-11 depends on stages: Stage-10
+  Stage-12 depends on stages: Stage-11, Stage-15
+  Stage-13 is a root stage
+  Stage-14 depends on stages: Stage-13
+  Stage-15 is a root stage
   Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
@@ -260,22 +291,27 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              sort order: 
+              key expressions: _col1 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: double)
               Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: string), _col1 (type: double)
+              value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              key expressions: _col0 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: double)
+              Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Inner Join 0 to 1
+               Left Outer Join0 to 1
           keys:
-            0 
-            1 
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE
+            0 _col1 (type: double)
+            1 _col0 (type: double)
+          outputColumnNames: _col0, _col1, _col3, _col4
+          Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -291,32 +327,34 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: double)
               sort order: ++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: double)
-              Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col3 (type: bigint), _col4 (type: bigint)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: double)
+              key expressions: _col3 (type: string), _col1 (type: double)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: double)
-              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col3 (type: string), _col1 (type: double)
+              Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Outer Join0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: double)
-            1 _col0 (type: string), _col1 (type: double)
-          outputColumnNames: _col0, _col1, _col3
-          Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE
+            1 _col3 (type: string), _col1 (type: double)
+          outputColumnNames: _col0, _col1, _col3, _col4, _col7
+          Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
-            Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
+            predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean)
+            Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: double)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -352,41 +390,123 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: (((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) (type: boolean)
-            Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
+            predicate: ((_col2 - _col1) > 600.0) (type: boolean)
+            Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                aggregations: count()
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
+              key expressions: _col1 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: double)
+              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: double)
+              Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: double)
+            1 _col0 (type: double)
+          outputColumnNames: _col0, _col3
+          Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col3 (type: double), _col0 (type: string)
+            outputColumnNames: _col3, _col0
+            Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: count(), count(_col0)
+              keys: _col3 (type: double)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: double)
+              Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(VALUE._col0), count(VALUE._col1)
+          keys: KEY._col0 (type: double)
           mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              expressions: p_mfgr (type: string), p_retailprice (type: double)
+              outputColumnNames: p_mfgr, p_retailprice
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: min(p_retailprice)
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: double)
+            outputColumnNames: _col1
+            Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: double)
+              mode: hash
+              outputColumnNames: _col0
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
                 table:
@@ -394,7 +514,29 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-6
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: double)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: double)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-9
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -437,45 +579,244 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: double)
+              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: double)
+              Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: double)
+            1 _col0 (type: double)
+          outputColumnNames: _col0, _col3
+          Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col3 (type: double)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
-PREHOOK: query: select b.p_mfgr, min(p_retailprice) 
-from part b 
-group by b.p_mfgr
-having b.p_mfgr not in 
-  (select p_mfgr 
-  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
-  where min(p_retailprice) = l and r - l > 600
-  )
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select b.p_mfgr, min(p_retailprice) 
-from part b 
-group by b.p_mfgr
-having b.p_mfgr not in 
-  (select p_mfgr 
-  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
-  where min(p_retailprice) = l and r - l > 600
-  )
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-Manufacturer#1	1173.15
-Manufacturer#2	1690.68
-Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
-PREHOOK: query: -- agg, non corr
-explain
-select b.p_mfgr, min(p_retailprice) 
-from part b 
-group by b.p_mfgr
-having b.p_mfgr not in 
-  (select p_mfgr 
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: double)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: double)
+              Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: double)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: double), true (type: boolean)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-12
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: double), _col2 (type: boolean)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1, _col2, _col3
+          Statistics: Num rows: 6 Data size: 798 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-13
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_mfgr (type: string), p_retailprice (type: double)
+              outputColumnNames: p_mfgr, p_retailprice
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: min(p_retailprice)
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: double)
+            outputColumnNames: _col1
+            Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: double)
+              mode: hash
+              outputColumnNames: _col0
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-14
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: double)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: double)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-15
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_mfgr (type: string)
+              outputColumnNames: p_mfgr
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string)
+            mode: complete
+            outputColumnNames: _col0
+            Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	1173.15
+Manufacturer#2	1690.68
+Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- agg, non corr
+explain
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
   from part a
   group by p_mfgr
   having max(p_retailprice) - min(p_retailprice) > 600
@@ -551,6 +892,7 @@ STAGE PLANS:
             Reduce Output Operator
               sort order: 
               Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -558,7 +900,7 @@ STAGE PLANS:
           keys:
             0 
             1 
-          outputColumnNames: _col0, _col1
+          outputColumnNames: _col0, _col1, _col2, _col3
           Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
@@ -576,13 +918,14 @@ STAGE PLANS:
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
               Statistics: Num rows: 13 Data size: 1898 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: double)
+              value expressions: _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint)
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -590,10 +933,10 @@ STAGE PLANS:
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col3
+          outputColumnNames: _col0, _col1, _col2, _col3, _col5
           Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
+            predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
             Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: double)
@@ -613,52 +956,47 @@ STAGE PLANS:
           TableScan
             alias: a
             Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_mfgr is null (type: boolean)
-              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: p_retailprice (type: double)
-                outputColumnNames: _col1
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: max(_col1), min(_col1)
-                  keys: null (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: double), _col2 (type: double)
+            Select Operator
+              expressions: p_mfgr (type: string), p_retailprice (type: double)
+              outputColumnNames: p_mfgr, p_retailprice
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: max(p_retailprice), min(p_retailprice)
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: double), _col2 (type: double)
       Reduce Operator Tree:
         Group By Operator
           aggregations: max(VALUE._col0), min(VALUE._col1)
           keys: KEY._col0 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: _col1 (type: double), _col2 (type: double)
-            outputColumnNames: _col1, _col2
-            Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((_col1 - _col2) > 600.0) (type: boolean)
-              Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: ((_col1 - _col2) > 600.0) (type: boolean)
+            Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(), count(_col0)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-5
     Map Reduce
@@ -667,24 +1005,19 @@ STAGE PLANS:
             Reduce Output Operator
               sort order: 
               Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
+              value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: count(VALUE._col0), count(VALUE._col1)
           mode: mergepartial
-          outputColumnNames: _col0
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-6
     Map Reduce
@@ -703,8 +1036,8 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
+                  key expressions: _col0 (type: string), true (type: boolean)
+                  sort order: ++
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: double), _col2 (type: double)
@@ -722,12 +1055,17 @@ STAGE PLANS:
               expressions: _col0 (type: string)
               outputColumnNames: _col0
               Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                keys: _col0 (type: string), true (type: boolean)
+                mode: complete
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -735,7 +1073,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select b.p_mfgr, min(p_retailprice) 
 from part b 
 group by b.p_mfgr
@@ -762,3 +1100,675 @@ POSTHOOK: Input: default@part
 #### A masked pattern was here ####
 Manufacturer#1	1173.15
 Manufacturer#2	1690.68
+PREHOOK: query: --nullability tests
+CREATE TABLE t1 (c1 INT, c2 CHAR(100))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: --nullability tests
+CREATE TABLE t1 (c1 INT, c2 CHAR(100))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t1
+POSTHOOK: query: INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t1.c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE t2 (c1 INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: CREATE TABLE t2 (c1 INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: INSERT INTO t2 VALUES (null), (2), (100)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t2
+POSTHOOK: query: INSERT INTO t2 VALUES (null), (2), (100)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.c1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-3 depends on stages: Stage-2, Stage-5
+  Stage-4 is a root stage
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: c1
+              Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: c1 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int)
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: boolean)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col4
+          Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean)
+            Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: c1
+              Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(), count(c1)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: int), true (type: boolean)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: boolean)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean)
+                  Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: boolean)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+PREHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-5
+  Stage-3 depends on stages: Stage-2, Stage-9
+  Stage-6 is a root stage
+  Stage-4 depends on stages: Stage-6
+  Stage-5 depends on stages: Stage-4
+  Stage-10 is a root stage
+  Stage-7 depends on stages: Stage-10
+  Stage-8 depends on stages: Stage-7
+  Stage-9 depends on stages: Stage-8, Stage-11
+  Stage-11 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: c1
+              Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: c1 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col2, _col3
+          Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col0 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col0 (type: int)
+              Statistics: Num rows: 2 Data size: 171 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: bigint), _col3 (type: bigint)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col3 (type: int), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col3 (type: int), _col1 (type: int)
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: boolean)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: int), _col0 (type: int)
+            1 _col3 (type: int), _col1 (type: int)
+          outputColumnNames: _col0, _col2, _col3, _col6
+          Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: CASE WHEN ((_col2 = 0)) THEN (true) WHEN (_col2 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (false) ELSE (true) END (type: boolean)
+            Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: c1
+              Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: c1 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: int)
+            mode: complete
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: int)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: int)
+                Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: count(), count(_col0)
+            keys: _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: c1
+              Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: c1 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: int)
+            mode: complete
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: int)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: int)
+                Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: int), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+              Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: int), true (type: boolean)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-9
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: int), _col2 (type: boolean)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c1 (type: int)
+              outputColumnNames: c1
+              Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: c1 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: int)
+            mode: complete
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+NULL
+1
+PREHOOK: query: DROP TABLE t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: DROP TABLE t2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: DROP TABLE t2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@t2

[06/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
index 012c3eb..14544c5 100644
--- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
@@ -31,33 +31,71 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 4 <- Map 3 (GROUP, 2)
+        Reducer 5 <- Map 4 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 2 
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: li
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
+                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col1 (type: int)
+            Local Work:
+              Map Reduce Local Work
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
                     Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: l_orderkey (type: int)
-                      outputColumnNames: _col0
+                      outputColumnNames: l_orderkey
                       Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        keys: _col0 (type: int)
+                        keys: l_orderkey (type: int)
                         mode: hash
                         outputColumnNames: _col0
                         Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                        Spark HashTable Sink Operator
-                          keys:
-                            0 _col0 (type: int)
-                            1 _col0 (type: int)
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
-        Map 3 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+                Spark HashTable Sink Operator
+                  keys:
+                    0 _col1 (type: int)
+                    1 _col0 (type: int)
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
@@ -75,7 +113,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-        Reducer 4 
+        Reducer 2 
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -85,60 +123,37 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                Spark HashTable Sink Operator
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
                   keys:
-                    0 _col1 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-1
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: li
-                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
-                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                    0 _col0 (type: int)
+                    1 _col1 (type: int)
+                  outputColumnNames: _col0, _col1, _col3
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col0, _col3
+                    input vertices:
+                      1 Reducer 5
+                    Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Left Semi Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-                        Map Join Operator
-                          condition map:
-                               Inner Join 0 to 1
-                          keys:
-                            0 _col1 (type: int)
-                            1 _col0 (type: int)
-                          outputColumnNames: _col2, _col4
-                          input vertices:
-                            1 Reducer 4
-                          Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
-                          Select Operator
-                            expressions: _col4 (type: int), _col2 (type: int)
-                            outputColumnNames: _col0, _col1
-                            Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
-                            File Output Operator
-                              compressed: false
-                              Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
-                              table:
-                                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-            Local Work:
-              Map Reduce Local Work
+                      expressions: _col0 (type: int), _col3 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -185,43 +200,149 @@ where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
+  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-3, Stage-4
   Stage-1 depends on stages: Stage-2
+  Stage-4 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
+  Stage: Stage-3
     Spark
-      Edges:
-        Reducer 4 <- Map 3 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 2 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean)
-                    Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
+                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: l_orderkey (type: int), 1 (type: int)
+                      expressions: l_orderkey (type: int), l_linenumber (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: int), _col1 (type: int)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
-                        Spark HashTable Sink Operator
-                          keys:
-                            0 _col0 (type: int), 1 (type: int)
-                            1 _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col1 (type: int)
+                          1 _col0 (type: int)
             Local Work:
               Map Reduce Local Work
+
+  Stage: Stage-2
+    Spark
+      Edges:
+        Reducer 8 <- Map 7 (GROUP, 2)
+        Reducer 9 <- Reducer 8 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
         Map 3 
             Map Operator Tree:
                 TableScan
+                  alias: li
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
+                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col1 (type: int)
+            Local Work:
+              Map Reduce Local Work
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: li
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: l_partkey (type: int), l_linenumber (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col2
+                        input vertices:
+                          0 Reducer 6
+                        Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col2 (type: int)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: int)
+                            sort order: +
+                            Map-reduce partition columns: _col0 (type: int)
+                            Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Reducer 8 
+            Execution mode: vectorized
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 55 Data size: 6599 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col1 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col0, _col3
+                  input vertices:
+                    0 Map 4
+                  Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: int), _col3 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE
+        Reducer 9 
+            Execution mode: vectorized
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE
+                Spark HashTable Sink Operator
+                  keys:
+                    0 _col1 (type: int), _col4 (type: int)
+                    1 _col0 (type: int), _col1 (type: int)
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
@@ -237,7 +358,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-        Reducer 4 
+        Reducer 2 
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -247,60 +368,76 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                Spark HashTable Sink Operator
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
                   keys:
-                    0 _col1 (type: int)
-                    1 _col0 (type: int)
+                    0 _col0 (type: int)
+                    1 _col1 (type: int)
+                  outputColumnNames: _col0, _col1, _col3, _col4
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: int), _col4 (type: int)
+                      1 _col0 (type: int), _col1 (type: int)
+                    outputColumnNames: _col0, _col3
+                    input vertices:
+                      1 Reducer 9
+                    Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: int), _col3 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-1
+  Stage: Stage-4
     Spark
+      Edges:
+        Reducer 6 <- Map 5 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: li
+                  alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
-                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Left Semi Join 0 to 1
-                        keys:
-                          0 _col0 (type: int), 1 (type: int)
-                          1 _col0 (type: int), _col1 (type: int)
-                        outputColumnNames: _col1, _col2
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-                        Map Join Operator
-                          condition map:
-                               Inner Join 0 to 1
-                          keys:
-                            0 _col1 (type: int)
-                            1 _col0 (type: int)
-                          outputColumnNames: _col2, _col4
-                          input vertices:
-                            1 Reducer 4
-                          Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
-                          Select Operator
-                            expressions: _col4 (type: int), _col2 (type: int)
-                            outputColumnNames: _col0, _col1
-                            Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
-                            File Output Operator
-                              compressed: false
-                              Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
-                              table:
-                                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: l_partkey (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+        Reducer 6 
+            Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                Spark HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subq_where_serialization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subq_where_serialization.q.out b/ql/src/test/results/clientpositive/subq_where_serialization.q.out
index f689651..5deb9d9 100644
--- a/ql/src/test/results/clientpositive/subq_where_serialization.q.out
+++ b/ql/src/test/results/clientpositive/subq_where_serialization.q.out
@@ -4,11 +4,9 @@ POSTHOOK: query: explain select src.key from src where src.key in ( select disti
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1
-  Stage-5 has a backup stage: Stage-1
-  Stage-3 depends on stages: Stage-5
-  Stage-1
-  Stage-0 depends on stages: Stage-3, Stage-1
+  Stage-4 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
   Stage: Stage-2
@@ -17,8 +15,9 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 keys: key (type: string)
@@ -38,9 +37,9 @@ STAGE PLANS:
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
             keys: _col0 (type: string)
-            mode: hash
+            mode: complete
             outputColumnNames: _col0
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -49,93 +48,46 @@ STAGE PLANS:
                   serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-4
-    Conditional Operator
-
-  Stage: Stage-5
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $INTNAME 
+        $hdt$_0:src 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $INTNAME 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
+        $hdt$_0:src 
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Left Semi Join 0 to 1
-                  keys:
-                    0 _col0 (type: string)
-                    1 _col0 (type: string)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Local Work:
-        Map Reduce Local Work
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
 
-  Stage: Stage-1
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Semi Join 0 to 1
-          keys:
-            0 _col0 (type: string)
-            1 _col0 (type: string)
-          outputColumnNames: _col0
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out
index 86f9089..1019e7a 100644
--- a/ql/src/test/results/clientpositive/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_exists.q.out
@@ -25,60 +25,146 @@ where exists
   )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((value > 'val_9') and key is not null) (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: string), value (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value > 'val_9') and key is not null) (type: boolean)
+              predicate: (value > 'val_9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col1 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col2 (type: string), _col3 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: string)
+            1 _col0 (type: string), _col1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -222,53 +308,136 @@ where exists
   )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: value is not null (type: boolean)
+            Select Operator
+              expressions: value (type: string)
+              outputColumnNames: value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
-                outputColumnNames: _col0, _col1
+              Group By Operator
+                keys: value (type: string)
+                mode: hash
+                outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: string)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: value is not null (type: boolean)
+            Select Operator
+              expressions: value (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: value (type: string)
-                outputColumnNames: _col0
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col1 (type: string)
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col1 (type: string)
             1 _col0 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_exists_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_exists_having.q.out b/ql/src/test/results/clientpositive/subquery_exists_having.q.out
index 8861c82..e54e18f 100644
--- a/ql/src/test/results/clientpositive/subquery_exists_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_exists_having.q.out
@@ -22,7 +22,10 @@ having exists
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-5 is a root stage
+  Stage-3 depends on stages: Stage-5
+  Stage-4 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -32,8 +35,9 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count()
@@ -72,41 +76,132 @@ STAGE PLANS:
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col1 (type: bigint)
           TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string)
+            mode: complete
+            outputColumnNames: _col0
+            Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value > 'val_9') and key is not null) (type: boolean)
+              predicate: (value > 'val_9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col2
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col2 (type: string)
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -175,8 +270,9 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count()
@@ -194,36 +290,48 @@ STAGE PLANS:
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value > 'val_9') and key is not null) (type: boolean)
+              predicate: (value > 'val_9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Demux Operator
-          Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1166 Data size: 12387 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
             aggregations: count(VALUE._col0)
             keys: KEY._col0 (type: string)
             mode: mergepartial
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE
             Mux Operator
-              Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -236,23 +344,84 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          Group By Operator
+            keys: KEY._col0 (type: string)
+            mode: mergepartial
+            outputColumnNames: _col0
+            Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col0 (type: string)
+              mode: complete
+              outputColumnNames: _col0
+              Statistics: Num rows: 291 Data size: 3091 Basic stats: COMPLETE Column stats: NONE
+              Mux Operator
+                Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE
+                Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col2
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  Mux Operator
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Group By Operator
+                      keys: _col2 (type: string)
+                      mode: complete
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                      Mux Operator
+                        Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE
+                        Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          keys:
+                            0 _col0 (type: string)
+                            1 _col0 (type: string)
+                          outputColumnNames: _col0, _col1
+                          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           Mux Operator
-            Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE
             Join Operator
               condition map:
-                   Left Semi Join 0 to 1
+                   Inner Join 0 to 1
               keys:
                 0 _col0 (type: string)
                 1 _col0 (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: _col2
               Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-              File Output Operator
-                compressed: false
+              Mux Operator
                 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                Group By Operator
+                  keys: _col2 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  Mux Operator
+                    Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE
+                    Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 _col0 (type: string)
+                        1 _col0 (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

[13/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out b/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out
new file mode 100644
index 0000000..332425f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/subquery_nested_subquery.q.out
@@ -0,0 +1,38 @@
+PREHOOK: query: select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+192697	almond antique blue firebrick mint	Manufacturer#5	Brand#52	MEDIUM BURNISHED TIN	31	LG DRUM	1789.69	ickly ir
+121152	almond antique burnished rose metallic	Manufacturer#1	Brand#14	PROMO PLATED TIN	2	JUMBO BOX	1173.15	e pinto beans h
+121152	almond antique burnished rose metallic	Manufacturer#1	Brand#14	PROMO PLATED TIN	2	JUMBO BOX	1173.15	e pinto beans h
+90681	almond antique chartreuse khaki white	Manufacturer#3	Brand#31	MEDIUM BURNISHED TIN	17	SM CASE	1671.68	are slyly after the sl
+85768	almond antique chartreuse lavender yellow	Manufacturer#1	Brand#12	LARGE BRUSHED STEEL	34	SM BAG	1753.76	refull
+17273	almond antique forest lavender goldenrod	Manufacturer#3	Brand#35	PROMO ANODIZED TIN	14	JUMBO CASE	1190.27	along the
+49671	almond antique gainsboro frosted violet	Manufacturer#4	Brand#41	SMALL BRUSHED BRASS	10	SM BOX	1620.67	ccounts run quick
+42669	almond antique medium spring khaki	Manufacturer#5	Brand#51	STANDARD BURNISHED TIN	6	MED CAN	1611.66	sits haggl
+112398	almond antique metallic orange dim	Manufacturer#3	Brand#32	MEDIUM BURNISHED BRASS	19	JUMBO JAR	1410.39	ole car
+40982	almond antique misty red olive	Manufacturer#3	Brand#32	ECONOMY PLATED COPPER	1	LG PKG	1922.98	c foxes can s
+144293	almond antique olive coral navajo	Manufacturer#3	Brand#34	STANDARD POLISHED STEEL	45	JUMBO CAN	1337.29	ag furiously about 
+110592	almond antique salmon chartreuse burlywood	Manufacturer#1	Brand#15	PROMO BURNISHED NICKEL	6	JUMBO PKG	1602.59	 to the furiously
+155733	almond antique sky peru orange	Manufacturer#5	Brand#53	SMALL PLATED BRASS	2	WRAP DRUM	1788.73	furiously. bra
+105685	almond antique violet chocolate turquoise	Manufacturer#2	Brand#22	MEDIUM ANODIZED COPPER	14	MED CAN	1690.68	ly pending requ
+48427	almond antique violet mint lemon	Manufacturer#4	Brand#42	PROMO POLISHED STEEL	39	SM CASE	1375.42	hely ironic i
+191709	almond antique violet turquoise frosted	Manufacturer#2	Brand#22	ECONOMY POLISHED STEEL	40	MED BOX	1800.7	 haggle
+86428	almond aquamarine burnished black steel	Manufacturer#1	Brand#12	STANDARD ANODIZED STEEL	28	WRAP BAG	1414.42	arefully 
+15103	almond aquamarine dodger light gainsboro	Manufacturer#5	Brand#53	ECONOMY BURNISHED STEEL	46	LG PACK	1018.1	packages hinder carefu
+45261	almond aquamarine floral ivory bisque	Manufacturer#4	Brand#42	SMALL PLATED STEEL	27	WRAP CASE	1206.26	careful
+146985	almond aquamarine midnight light salmon	Manufacturer#2	Brand#23	MEDIUM BURNISHED COPPER	2	SM CASE	2031.98	s cajole caref
+65667	almond aquamarine pink moccasin thistle	Manufacturer#1	Brand#12	LARGE BURNISHED STEEL	42	JUMBO CASE	1632.66	e across the expr
+132666	almond aquamarine rose maroon antique	Manufacturer#2	Brand#24	SMALL POLISHED NICKEL	25	MED BOX	1698.66	even 
+195606	almond aquamarine sandy cyan gainsboro	Manufacturer#2	Brand#25	STANDARD PLATED TIN	18	SM PKG	1701.6	ic de
+17927	almond aquamarine yellow dodger mint	Manufacturer#4	Brand#41	ECONOMY BRUSHED COPPER	7	SM PKG	1844.92	ites. eve
+33357	almond azure aquamarine papaya violet	Manufacturer#4	Brand#41	STANDARD ANODIZED TIN	12	WRAP CASE	1290.35	reful
+78486	almond azure blanched chiffon midnight	Manufacturer#5	Brand#52	LARGE BRUSHED BRASS	23	MED BAG	1464.48	hely blith

[05/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out
index 854aa36..e277c59 100644
--- a/ql/src/test/results/clientpositive/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out
@@ -60,8 +60,9 @@ having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.k
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-3
+  Stage-2 depends on stages: Stage-1, Stage-4
   Stage-3 is a root stage
+  Stage-4 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -94,15 +95,12 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: _col1 is not null (type: boolean)
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
@@ -119,11 +117,11 @@ STAGE PLANS:
               key expressions: _col0 (type: bigint)
               sort order: +
               Map-reduce partition columns: _col0 (type: bigint)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col1 (type: bigint)
             1 _col0 (type: bigint)
@@ -169,24 +167,39 @@ STAGE PLANS:
             expressions: _col1 (type: bigint)
             outputColumnNames: _col1
             Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: _col1 is not null (type: boolean)
+            Group By Operator
+              keys: _col1 (type: bigint)
+              mode: hash
+              outputColumnNames: _col0
               Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: _col1 (type: bigint)
-                outputColumnNames: _col0
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: bigint)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: bigint)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: bigint)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -269,8 +282,11 @@ having count(*) in (select count(*) from src s1 where s1.key > '9'  and s1.value
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-3
-  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-5
+  Stage-6 is a root stage
+  Stage-3 depends on stages: Stage-6
+  Stage-4 depends on stages: Stage-3
+  Stage-5 depends on stages: Stage-4
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -280,25 +296,22 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: value is not null (type: boolean)
+            Select Operator
+              expressions: value (type: string), key (type: string)
+              outputColumnNames: value, key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: value, key
+              Group By Operator
+                aggregations: count()
+                keys: value (type: string), key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  keys: value (type: string), key (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: bigint)
+                  value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -306,19 +319,16 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: _col2 is not null (type: boolean)
+          Select Operator
+            expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
+            outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
-              outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
@@ -332,17 +342,17 @@ STAGE PLANS:
               value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: bigint)
+              key expressions: _col1 (type: string), _col0 (type: bigint)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
+              Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col1 (type: string), _col2 (type: bigint)
-            1 _col0 (type: string), _col1 (type: bigint)
+            1 _col1 (type: string), _col0 (type: bigint)
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
@@ -353,6 +363,48 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string), key (type: string)
+              outputColumnNames: value, key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: value (type: string), key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string)
+            outputColumnNames: _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string)
+              mode: complete
+              outputColumnNames: _col0
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-3
     Map Reduce
       Map Operator Tree:
@@ -360,53 +412,108 @@ STAGE PLANS:
             alias: s1
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
+              predicate: (key > '9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: value, key
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  keys: value (type: string), key (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: bigint)
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col0 (type: string)
+            outputColumnNames: _col2, _col0
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: count()
+              keys: _col2 (type: string), _col0 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col2 (type: bigint)
-            outputColumnNames: _col0, _col2
-            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: _col2 is not null (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: _col0 (type: string), _col2 (type: bigint)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col1 (type: bigint)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            outputColumnNames: _col1, _col2
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string), _col2 (type: bigint)
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col0 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -451,8 +558,9 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_mfgr is not null (type: boolean)
+            Select Operator
+              expressions: p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: p_mfgr, p_size
               Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: avg(p_size)
@@ -495,11 +603,11 @@ STAGE PLANS:
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
@@ -519,8 +627,9 @@ STAGE PLANS:
           TableScan
             alias: part_subq
             Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_mfgr is not null (type: boolean)
+            Select Operator
+              expressions: p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: p_mfgr, p_size
               Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: max(p_size), min(p_size)
@@ -550,9 +659,9 @@ STAGE PLANS:
               Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 keys: _col0 (type: string)
-                mode: hash
+                mode: complete
                 outputColumnNames: _col0
-                Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -601,8 +710,9 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_mfgr is not null (type: boolean)
+            Select Operator
+              expressions: p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: p_mfgr, p_size
               Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: avg(p_size)
@@ -619,8 +729,9 @@ STAGE PLANS:
           TableScan
             alias: part_subq
             Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_mfgr is not null (type: boolean)
+            Select Operator
+              expressions: p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: p_mfgr, p_size
               Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: max(p_size), min(p_size)
@@ -644,10 +755,10 @@ STAGE PLANS:
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
             Mux Operator
-              Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -673,23 +784,28 @@ STAGE PLANS:
                 expressions: _col0 (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE
-                Mux Operator
-                  Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE
-                  Join Operator
-                    condition map:
-                         Left Semi Join 0 to 1
-                    keys:
-                      0 _col0 (type: string)
-                      1 _col0 (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    File Output Operator
-                      compressed: false
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+                  Mux Operator
+                    Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE
+                    Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 _col0 (type: string)
+                        1 _col0 (type: string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -765,60 +881,84 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (key > '8') (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col1 (type: string)
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (key > '8') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string)
+              Group By Operator
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Semi Join 0 to 1
-          keys:
-            0 _col0 (type: string)
-            1 _col0 (type: string)
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+        Demux Operator
+          Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+          Mux Operator
+            Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Group By Operator
+                aggregations: count()
+                keys: _col0 (type: string), _col1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
           Group By Operator
-            aggregations: count()
-            keys: _col0 (type: string), _col1 (type: string)
-            mode: hash
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            keys: KEY._col0 (type: string)
+            mode: mergepartial
+            outputColumnNames: _col0
+            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+            Mux Operator
+              Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  keys: _col0 (type: string), _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
@@ -828,7 +968,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string)
               sort order: ++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -836,16 +976,13 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: _col2 is not null (type: boolean)
-            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-3
     Map Reduce
@@ -855,7 +992,7 @@ STAGE PLANS:
               key expressions: _col2 (type: bigint)
               sort order: +
               Map-reduce partition columns: _col2 (type: bigint)
-              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col0 (type: string), _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -864,21 +1001,47 @@ STAGE PLANS:
               Map-reduce partition columns: _col0 (type: bigint)
               Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Semi Join 0 to 1
-          keys:
-            0 _col2 (type: bigint)
-            1 _col0 (type: bigint)
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Demux Operator
+          Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Mux Operator
+            Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col2 (type: bigint)
+                1 _col0 (type: bigint)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          Group By Operator
+            keys: KEY._col0 (type: bigint)
+            mode: mergepartial
+            outputColumnNames: _col0
+            Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+            Mux Operator
+              Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col2 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-5
     Map Reduce
@@ -912,24 +1075,17 @@ STAGE PLANS:
             expressions: _col1 (type: bigint)
             outputColumnNames: _col1
             Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: _col1 is not null (type: boolean)
+            Group By Operator
+              keys: _col1 (type: bigint)
+              mode: hash
+              outputColumnNames: _col0
               Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: _col1 (type: bigint)
-                outputColumnNames: _col0
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: bigint)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -966,60 +1122,91 @@ group by key, value
 having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2, Stage-5
   Stage-5 is a root stage
-  Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-9, Stage-3
-  Stage-9 has a backup stage: Stage-3
-  Stage-6 depends on stages: Stage-9
-  Stage-3
-  Stage-10 is a root stage
-  Stage-2 depends on stages: Stage-10
-  Stage-0 depends on stages: Stage-6, Stage-3
+  Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
-  Stage: Stage-5
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: s1
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key > '9') (type: boolean)
+              predicate: (key > '8') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count()
                 keys: key (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1
+                outputColumnNames: _col0
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: _col1 (type: bigint)
-            outputColumnNames: _col1
-            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: _col1 is not null (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: _col1 (type: bigint)
-                outputColumnNames: _col0
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+        Demux Operator
+          Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+          Mux Operator
+            Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Group By Operator
+                aggregations: count()
+                keys: _col0 (type: string), _col1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Group By Operator
+            keys: KEY._col0 (type: string)
+            mode: mergepartial
+            outputColumnNames: _col0
+            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+            Mux Operator
+              Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                 Group By Operator
-                  keys: _col0 (type: bigint)
+                  aggregations: count()
+                  keys: _col0 (type: string), _col1 (type: string)
                   mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                   File Output Operator
                     compressed: false
                     table:
@@ -1027,44 +1214,29 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-7
-    Conditional Operator
-
-  Stage: Stage-9
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $INTNAME1 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $INTNAME1 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col2 (type: bigint)
-                1 _col0 (type: bigint)
-
-  Stage: Stage-6
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
-            Map Join Operator
-              condition map:
-                   Left Semi Join 0 to 1
-              keys:
-                0 _col2 (type: bigint)
-                1 _col0 (type: bigint)
-              outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Local Work:
-        Map Reduce Local Work
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              value expressions: _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-3
     Map Reduce
@@ -1074,7 +1246,7 @@ STAGE PLANS:
               key expressions: _col2 (type: bigint)
               sort order: +
               Map-reduce partition columns: _col2 (type: bigint)
-              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col0 (type: string), _col1 (type: string)
           TableScan
             Reduce Output Operator
@@ -1083,101 +1255,91 @@ STAGE PLANS:
               Map-reduce partition columns: _col0 (type: bigint)
               Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Semi Join 0 to 1
-          keys:
-            0 _col2 (type: bigint)
-            1 _col0 (type: bigint)
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-10
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_0:$hdt$_1:src 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_0:$hdt$_1:src 
-          TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (key > '8') (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  HashTable Sink Operator
-                    keys:
-                      0 _col0 (type: string)
-                      1 _col0 (type: string)
+        Demux Operator
+          Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Mux Operator
+            Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col2 (type: bigint)
+                1 _col0 (type: bigint)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          Group By Operator
+            keys: KEY._col0 (type: bigint)
+            mode: mergepartial
+            outputColumnNames: _col0
+            Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+            Mux Operator
+              Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col2 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-2
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: b
+            alias: s1
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key > '8') (type: boolean)
+              predicate: (key > '9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count()
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Left Semi Join 0 to 1
-                  keys:
-                    0 _col0 (type: string)
-                    1 _col0 (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
-                  Group By Operator
-                    aggregations: count()
-                    keys: _col0 (type: string), _col1 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                      Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col2 (type: bigint)
-      Local Work:
-        Map Reduce Local Work
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          keys: KEY._col0 (type: string)
           mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: _col2 is not null (type: boolean)
-            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint)
+            outputColumnNames: _col1
+            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: bigint)
+              mode: hash
+              outputColumnNames: _col0
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -1203,12 +1365,9 @@ having p_name in
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-5 depends on stages: Stage-1, Stage-3 , consists of Stage-6, Stage-2
-  Stage-6 has a backup stage: Stage-2
-  Stage-4 depends on stages: Stage-6
-  Stage-2
+  Stage-2 depends on stages: Stage-1, Stage-3
   Stage-3 is a root stage
-  Stage-0 depends on stages: Stage-4, Stage-2
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -1217,8 +1376,9 @@ STAGE PLANS:
           TableScan
             alias: part_subq
             Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_name is not null (type: boolean)
+            Select Operator
+              expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: p_name, p_mfgr, p_size
               Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: avg(p_size)
@@ -1250,45 +1410,6 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-5
-    Conditional Operator
-
-  Stage: Stage-6
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $INTNAME1 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $INTNAME1 
-          TableScan
-            HashTable Sink Operator
-              keys:
-                0 _col1 (type: string)
-                1 _col0 (type: string)
-
-  Stage: Stage-4
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Map Join Operator
-              condition map:
-                   Left Semi Join 0 to 1
-              keys:
-                0 _col1 (type: string)
-                1 _col0 (type: string)
-              outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Local Work:
-        Map Reduce Local Work
-
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
@@ -1306,21 +1427,47 @@ STAGE PLANS:
               Map-reduce partition columns: _col0 (type: string)
               Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Semi Join 0 to 1
-          keys:
-            0 _col1 (type: string)
-            1 _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Demux Operator
+          Statistics: Num rows: 22 Data size: 4653 Basic stats: COMPLETE Column stats: NONE
+          Mux Operator
+            Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col1 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          Group By Operator
+            keys: KEY._col0 (type: string)
+            mode: mergepartial
+            outputColumnNames: _col0
+            Statistics: Num rows: 11 Data size: 2326 Basic stats: COMPLETE Column stats: NONE
+            Mux Operator
+              Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-3
     Map Reduce
@@ -1359,24 +1506,21 @@ STAGE PLANS:
                         window function: GenericUDAFFirstValueEvaluator
                         window frame: PRECEDING(MAX)~
             Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: first_value_window_0 is not null (type: boolean)
+            Select Operator
+              expressions: first_value_window_0 (type: string)
+              outputColumnNames: _col0
               Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: first_value_window_0 (type: string)
+              Group By Operator
+                keys: _col0 (type: string)
+                mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_notexists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out
index ede7855..6ec3b46 100644
--- a/ql/src/test/results/clientpositive/subquery_notexists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out
@@ -19,11 +19,14 @@ where not exists
   )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -31,13 +34,34 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col1 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Group By Operator
+                keys: key (type: string), value (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -45,25 +69,100 @@ STAGE PLANS:
               predicate: (value > 'val_2') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: value (type: string), key (type: string)
+                expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string), _col0 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
-                  Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: string)
+            1 _col0 (type: string), _col1 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col2 (type: string), _col3 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), true (type: boolean)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Outer Join0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
-            1 _col1 (type: string), _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col3
+            1 _col0 (type: string), _col1 (type: string)
+          outputColumnNames: _col0, _col1, _col4
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
+            predicate: _col4 is null (type: boolean)
             Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: string)
@@ -243,11 +342,46 @@ where not exists
   )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string)
+              outputColumnNames: value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: value (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
@@ -258,35 +392,80 @@ STAGE PLANS:
               predicate: (value > 'val_2') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: value, key
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: value (type: string), key (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col0 (type: string)
+            outputColumnNames: _col2, _col0
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col2 (type: string), _col0 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string)
             outputColumnNames: _col1
-            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string)
+              mode: complete
+              outputColumnNames: _col0
+              Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: string), true (type: boolean)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Map Reduce
@@ -306,17 +485,18 @@ STAGE PLANS:
                 value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              key expressions: _col1 (type: string)
+              key expressions: _col0 (type: string)
               sort order: +
-              Map-reduce partition columns: _col1 (type: string)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Outer Join0 to 1
           keys:
             0 _col1 (type: string)
-            1 _col1 (type: string)
+            1 _col0 (type: string)
           outputColumnNames: _col0, _col1, _col3
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           Filter Operator

[16/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 70ec02f..fa54bb7 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1803,46 +1803,71 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_14]
-        Select Operator [SEL_13] (rows=1 width=178)
+      File Output Operator [FS_29]
+        Select Operator [SEL_28] (rows=1 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_12] (rows=1 width=269)
+          Filter Operator [FIL_27] (rows=1 width=182)
             predicate:_col3 is null
-            Merge Join Operator [MERGEJOIN_17] (rows=500 width=269)
-              Conds:RS_9._col1=RS_10._col1(Left Outer),Output:["_col0","_col1","_col3"]
+            Merge Join Operator [MERGEJOIN_37] (rows=500 width=182)
+              Conds:RS_24._col1=RS_25._col0(Left Outer),Output:["_col0","_col1","_col3"]
             <-Map 1 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_9]
+              SHUFFLE [RS_24]
                 PartitionCols:_col1
                 Select Operator [SEL_1] (rows=500 width=178)
                   Output:["_col0","_col1"]
                   TableScan [TS_0] (rows=500 width=178)
                     default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-            <-Reducer 4 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_10]
-                PartitionCols:_col1
-                Select Operator [SEL_8] (rows=83 width=178)
-                  Output:["_col1"]
-                  Group By Operator [GBY_7] (rows=83 width=178)
-                    Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                  <-Map 3 [SIMPLE_EDGE] llap
-                    SHUFFLE [RS_6]
-                      PartitionCols:_col0, _col1
-                      Group By Operator [GBY_5] (rows=83 width=178)
-                        Output:["_col0","_col1"],keys:value, key
-                        Select Operator [SEL_4] (rows=166 width=178)
-                          Output:["value","key"]
-                          Filter Operator [FIL_16] (rows=166 width=178)
-                            predicate:(value > 'val_2')
-                            TableScan [TS_2] (rows=500 width=178)
-                              default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+            <-Reducer 5 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_25]
+                PartitionCols:_col0
+                Select Operator [SEL_23] (rows=56 width=95)
+                  Output:["_col0","_col1"]
+                  Group By Operator [GBY_22] (rows=56 width=91)
+                    Output:["_col0"],keys:_col1
+                    Select Operator [SEL_18] (rows=83 width=178)
+                      Output:["_col1"]
+                      Group By Operator [GBY_17] (rows=83 width=178)
+                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                      <-Reducer 4 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_16]
+                          PartitionCols:_col0
+                          Group By Operator [GBY_15] (rows=83 width=178)
+                            Output:["_col0","_col1"],keys:_col2, _col0
+                            Select Operator [SEL_14] (rows=166 width=178)
+                              Output:["_col2","_col0"]
+                              Merge Join Operator [MERGEJOIN_36] (rows=166 width=178)
+                                Conds:RS_11._col1=RS_12._col0(Inner),Output:["_col0","_col2"]
+                              <-Map 3 [SIMPLE_EDGE] llap
+                                SHUFFLE [RS_11]
+                                  PartitionCols:_col1
+                                  Select Operator [SEL_4] (rows=166 width=178)
+                                    Output:["_col0","_col1"]
+                                    Filter Operator [FIL_34] (rows=166 width=178)
+                                      predicate:(value > 'val_2')
+                                      TableScan [TS_2] (rows=500 width=178)
+                                        default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                              <-Reducer 7 [SIMPLE_EDGE] llap
+                                SHUFFLE [RS_12]
+                                  PartitionCols:_col0
+                                  Group By Operator [GBY_9] (rows=214 width=91)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Map 6 [SIMPLE_EDGE] llap
+                                    SHUFFLE [RS_8]
+                                      PartitionCols:_col0
+                                      Group By Operator [GBY_7] (rows=214 width=91)
+                                        Output:["_col0"],keys:value
+                                        TableScan [TS_5] (rows=500 width=91)
+                                          default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["value"]
 
 PREHOOK: query: explain select * 
 from src_cbo b 
@@ -1866,31 +1891,25 @@ Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 3 llap
-      File Output Operator [FS_14]
-        Select Operator [SEL_13] (rows=1 width=178)
+      File Output Operator [FS_33]
+        Select Operator [SEL_32] (rows=1 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_12] (rows=1 width=265)
-            predicate:_col3 is null
-            Merge Join Operator [MERGEJOIN_17] (rows=250 width=265)
-              Conds:RS_9._col0, _col1=RS_10._col1, _col0(Left Outer),Output:["_col0","_col1","_col3"]
-            <-Map 4 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_10]
-                PartitionCols:_col1, _col0
-                Select Operator [SEL_8] (rows=166 width=178)
-                  Output:["_col0","_col1"]
-                  Filter Operator [FIL_16] (rows=166 width=178)
-                    predicate:(value > 'val_12')
-                    TableScan [TS_6] (rows=500 width=178)
-                      default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+          Filter Operator [FIL_31] (rows=1 width=182)
+            predicate:_col4 is null
+            Merge Join Operator [MERGEJOIN_41] (rows=250 width=182)
+              Conds:RS_28._col0, _col1=RS_29._col0, _col1(Left Outer),Output:["_col0","_col1","_col4"]
             <-Reducer 2 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_9]
+              SHUFFLE [RS_28]
                 PartitionCols:_col0, _col1
                 Group By Operator [GBY_4] (rows=250 width=178)
                   Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
@@ -1903,6 +1922,43 @@ Stage-0
                         Output:["key","value"]
                         TableScan [TS_0] (rows=500 width=178)
                           default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+            <-Reducer 6 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_29]
+                PartitionCols:_col0, _col1
+                Select Operator [SEL_27] (rows=1 width=182)
+                  Output:["_col0","_col1","_col2"]
+                  Group By Operator [GBY_26] (rows=1 width=178)
+                    Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                  <-Reducer 5 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_25]
+                      PartitionCols:_col0, _col1
+                      Group By Operator [GBY_24] (rows=1 width=178)
+                        Output:["_col0","_col1"],keys:_col2, _col3
+                        Merge Join Operator [MERGEJOIN_40] (rows=1 width=178)
+                          Conds:RS_20._col0, _col1=RS_21._col0, _col1(Inner),Output:["_col2","_col3"]
+                        <-Map 4 [SIMPLE_EDGE] llap
+                          SHUFFLE [RS_20]
+                            PartitionCols:_col0, _col1
+                            Select Operator [SEL_8] (rows=166 width=178)
+                              Output:["_col0","_col1"]
+                              Filter Operator [FIL_38] (rows=166 width=178)
+                                predicate:(value > 'val_12')
+                                TableScan [TS_6] (rows=500 width=178)
+                                  default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                        <-Reducer 8 [SIMPLE_EDGE] llap
+                          SHUFFLE [RS_21]
+                            PartitionCols:_col0, _col1
+                            Group By Operator [GBY_18] (rows=250 width=178)
+                              Output:["_col0","_col1"],keys:_col0, _col1
+                              Group By Operator [GBY_13] (rows=250 width=178)
+                                Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                              <-Map 7 [SIMPLE_EDGE] llap
+                                SHUFFLE [RS_12]
+                                  PartitionCols:_col0, _col1
+                                  Group By Operator [GBY_11] (rows=250 width=178)
+                                    Output:["_col0","_col1"],keys:key, value
+                                    TableScan [TS_9] (rows=500 width=178)
+                                      default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
 PREHOOK: query: create view cv1 as 
 select * 
@@ -1933,36 +1989,59 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_12]
-        Merge Join Operator [MERGEJOIN_17] (rows=2 width=178)
-          Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"]
+      File Output Operator [FS_23]
+        Merge Join Operator [MERGEJOIN_33] (rows=1 width=178)
+          Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_8]
+          SHUFFLE [RS_19]
             PartitionCols:_col0, _col1
-            Select Operator [SEL_2] (rows=166 width=178)
+            Select Operator [SEL_1] (rows=500 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_15] (rows=166 width=178)
-                predicate:((value > 'val_9') and key is not null)
-                TableScan [TS_0] (rows=500 width=178)
-                  default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
-        <-Map 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_9]
+              TableScan [TS_0] (rows=500 width=178)
+                default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
+        <-Reducer 5 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_20]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_7] (rows=83 width=178)
-              Output:["_col0","_col1"],keys:_col0, _col1
-              Select Operator [SEL_5] (rows=166 width=178)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_16] (rows=166 width=178)
-                  predicate:((value > 'val_9') and key is not null)
-                  TableScan [TS_3] (rows=500 width=178)
-                    default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+            Group By Operator [GBY_17] (rows=1 width=178)
+              Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+            <-Reducer 4 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_16]
+                PartitionCols:_col0, _col1
+                Group By Operator [GBY_15] (rows=1 width=178)
+                  Output:["_col0","_col1"],keys:_col2, _col3
+                  Merge Join Operator [MERGEJOIN_32] (rows=1 width=178)
+                    Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
+                  <-Map 3 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_11]
+                      PartitionCols:_col0, _col1
+                      Select Operator [SEL_4] (rows=166 width=178)
+                        Output:["_col0","_col1"]
+                        Filter Operator [FIL_30] (rows=166 width=178)
+                          predicate:(value > 'val_9')
+                          TableScan [TS_2] (rows=500 width=178)
+                            default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                  <-Reducer 7 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_12]
+                      PartitionCols:_col0, _col1
+                      Group By Operator [GBY_9] (rows=250 width=178)
+                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                      <-Map 6 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_8]
+                          PartitionCols:_col0, _col1
+                          Group By Operator [GBY_7] (rows=250 width=178)
+                            Output:["_col0","_col1"],keys:key, value
+                            TableScan [TS_5] (rows=500 width=178)
+                              default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
 
 PREHOOK: query: explain select * 
 from (select * 
@@ -1985,36 +2064,59 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_12]
-        Merge Join Operator [MERGEJOIN_17] (rows=2 width=178)
-          Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"]
+      File Output Operator [FS_23]
+        Merge Join Operator [MERGEJOIN_33] (rows=1 width=178)
+          Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_8]
+          SHUFFLE [RS_19]
             PartitionCols:_col0, _col1
-            Select Operator [SEL_2] (rows=166 width=178)
+            Select Operator [SEL_1] (rows=500 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_15] (rows=166 width=178)
-                predicate:((value > 'val_9') and key is not null)
-                TableScan [TS_0] (rows=500 width=178)
-                  default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-        <-Map 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_9]
+              TableScan [TS_0] (rows=500 width=178)
+                default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+        <-Reducer 5 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_20]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_7] (rows=83 width=178)
-              Output:["_col0","_col1"],keys:_col0, _col1
-              Select Operator [SEL_5] (rows=166 width=178)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_16] (rows=166 width=178)
-                  predicate:((value > 'val_9') and key is not null)
-                  TableScan [TS_3] (rows=500 width=178)
-                    default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+            Group By Operator [GBY_17] (rows=1 width=178)
+              Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+            <-Reducer 4 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_16]
+                PartitionCols:_col0, _col1
+                Group By Operator [GBY_15] (rows=1 width=178)
+                  Output:["_col0","_col1"],keys:_col2, _col3
+                  Merge Join Operator [MERGEJOIN_32] (rows=1 width=178)
+                    Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
+                  <-Map 3 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_11]
+                      PartitionCols:_col0, _col1
+                      Select Operator [SEL_4] (rows=166 width=178)
+                        Output:["_col0","_col1"]
+                        Filter Operator [FIL_30] (rows=166 width=178)
+                          predicate:(value > 'val_9')
+                          TableScan [TS_2] (rows=500 width=178)
+                            default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                  <-Reducer 7 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_12]
+                      PartitionCols:_col0, _col1
+                      Group By Operator [GBY_9] (rows=250 width=178)
+                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                      <-Map 6 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_8]
+                          PartitionCols:_col0, _col1
+                          Group By Operator [GBY_7] (rows=250 width=178)
+                            Output:["_col0","_col1"],keys:key, value
+                            TableScan [TS_5] (rows=500 width=178)
+                              default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
 PREHOOK: query: explain select * 
 from src_cbo 
@@ -2027,36 +2129,38 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 2 llap
-      File Output Operator [FS_12]
-        Merge Join Operator [MERGEJOIN_17] (rows=166 width=178)
-          Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"]
+      File Output Operator [FS_13]
+        Merge Join Operator [MERGEJOIN_18] (rows=168 width=178)
+          Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_8]
+          SHUFFLE [RS_9]
             PartitionCols:_col0
-            Select Operator [SEL_2] (rows=166 width=178)
+            Select Operator [SEL_1] (rows=500 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_15] (rows=166 width=178)
-                predicate:(key > '9')
-                TableScan [TS_0] (rows=500 width=178)
-                  default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-        <-Map 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_9]
+              TableScan [TS_0] (rows=500 width=178)
+                default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+        <-Reducer 4 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_10]
             PartitionCols:_col0
             Group By Operator [GBY_7] (rows=69 width=87)
-              Output:["_col0"],keys:_col0
-              Select Operator [SEL_5] (rows=166 width=87)
-                Output:["_col0"]
-                Filter Operator [FIL_16] (rows=166 width=87)
-                  predicate:(key > '9')
-                  TableScan [TS_3] (rows=500 width=87)
-                    default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+              Output:["_col0"],keys:KEY._col0
+            <-Map 3 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_6]
+                PartitionCols:_col0
+                Group By Operator [GBY_5] (rows=69 width=87)
+                  Output:["_col0"],keys:key
+                  Filter Operator [FIL_17] (rows=166 width=87)
+                    predicate:(key > '9')
+                    TableScan [TS_2] (rows=500 width=87)
+                      default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
 
 PREHOOK: query: explain select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
@@ -2071,59 +2175,109 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 10 <- Map 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 3 llap
-      File Output Operator [FS_22]
-        Select Operator [SEL_21] (rows=4 width=8)
+      Reducer 4 llap
+      File Output Operator [FS_46]
+        Select Operator [SEL_45] (rows=5 width=8)
           Output:["_col0","_col1"]
-          Merge Join Operator [MERGEJOIN_32] (rows=4 width=8)
-            Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col2","_col4"]
-          <-Reducer 2 [SIMPLE_EDGE] llap
-            SHUFFLE [RS_18]
-              PartitionCols:_col1
-              Merge Join Operator [MERGEJOIN_31] (rows=13 width=8)
-                Conds:RS_15._col0, 1=RS_16._col0, _col1(Left Semi),Output:["_col1","_col2"]
-              <-Map 1 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_15]
-                  PartitionCols:_col0, 1
-                  Select Operator [SEL_2] (rows=17 width=16)
-                    Output:["_col0","_col1","_col2"]
-                    Filter Operator [FIL_28] (rows=17 width=16)
-                      predicate:((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null)
-                      TableScan [TS_0] (rows=100 width=16)
-                        default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
-              <-Map 4 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_16]
-                  PartitionCols:_col0, _col1
-                  Group By Operator [GBY_14] (rows=4 width=8)
-                    Output:["_col0","_col1"],keys:_col0, _col1
-                    Select Operator [SEL_5] (rows=14 width=8)
-                      Output:["_col0","_col1"]
-                      Filter Operator [FIL_29] (rows=14 width=96)
-                        predicate:((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null)
-                        TableScan [TS_3] (rows=100 width=96)
-                          default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
-          <-Reducer 6 [SIMPLE_EDGE] llap
-            SHUFFLE [RS_19]
-              PartitionCols:_col0
-              Group By Operator [GBY_11] (rows=50 width=4)
-                Output:["_col0"],keys:KEY._col0
+          Merge Join Operator [MERGEJOIN_67] (rows=5 width=8)
+            Conds:RS_42._col1, _col4=RS_43._col0, _col1(Inner),Output:["_col0","_col3"]
+          <-Reducer 3 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_42]
+              PartitionCols:_col1, _col4
+              Merge Join Operator [MERGEJOIN_64] (rows=5 width=16)
+                Conds:RS_39._col0=RS_40._col1(Inner),Output:["_col0","_col1","_col3","_col4"]
               <-Map 5 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_10]
+                SHUFFLE [RS_40]
+                  PartitionCols:_col1
+                  Select Operator [SEL_9] (rows=17 width=16)
+                    Output:["_col0","_col1","_col2","_col3"]
+                    Filter Operator [FIL_60] (rows=17 width=16)
+                      predicate:((l_linenumber = 1) and l_partkey is not null)
+                      TableScan [TS_7] (rows=100 width=16)
+                        default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
+              <-Reducer 2 [SIMPLE_EDGE] llap
+                SHUFFLE [RS_39]
                   PartitionCols:_col0
-                  Group By Operator [GBY_9] (rows=50 width=4)
-                    Output:["_col0"],keys:l_partkey
-                    Filter Operator [FIL_30] (rows=100 width=4)
-                      predicate:l_partkey is not null
-                      TableScan [TS_6] (rows=100 width=4)
-                        default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
+                  Group By Operator [GBY_5] (rows=50 width=4)
+                    Output:["_col0"],keys:KEY._col0
+                  <-Map 1 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_4]
+                      PartitionCols:_col0
+                      Group By Operator [GBY_3] (rows=50 width=4)
+                        Output:["_col0"],keys:l_partkey
+                        Filter Operator [FIL_59] (rows=100 width=4)
+                          predicate:l_partkey is not null
+                          TableScan [TS_0] (rows=100 width=4)
+                            default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
+          <-Reducer 8 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_43]
+              PartitionCols:_col0, _col1
+              Group By Operator [GBY_37] (rows=4 width=8)
+                Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+              <-Reducer 7 [SIMPLE_EDGE] llap
+                SHUFFLE [RS_36]
+                  PartitionCols:_col0, _col1
+                  Group By Operator [GBY_35] (rows=4 width=8)
+                    Output:["_col0","_col1"],keys:_col0, _col3
+                    Merge Join Operator [MERGEJOIN_66] (rows=14 width=8)
+                      Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"]
+                    <-Map 6 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_31]
+                        PartitionCols:_col1
+                        Select Operator [SEL_12] (rows=14 width=95)
+                          Output:["_col0","_col1"]
+                          Filter Operator [FIL_61] (rows=14 width=96)
+                            predicate:(l_shipmode = 'AIR')
+                            TableScan [TS_10] (rows=100 width=96)
+                              default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
+                    <-Reducer 12 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_32]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_29] (rows=3 width=4)
+                          Output:["_col0"],keys:KEY._col0
+                        <-Reducer 11 [SIMPLE_EDGE] llap
+                          SHUFFLE [RS_28]
+                            PartitionCols:_col0
+                            Group By Operator [GBY_27] (rows=3 width=4)
+                              Output:["_col0"],keys:_col2
+                              Merge Join Operator [MERGEJOIN_65] (rows=34 width=4)
+                                Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"]
+                              <-Map 13 [SIMPLE_EDGE] llap
+                                SHUFFLE [RS_24]
+                                  PartitionCols:_col0
+                                  Select Operator [SEL_22] (rows=100 width=8)
+                                    Output:["_col0","_col1"]
+                                    Filter Operator [FIL_63] (rows=100 width=8)
+                                      predicate:l_partkey is not null
+                                      TableScan [TS_20] (rows=100 width=8)
+                                        default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"]
+                              <-Reducer 10 [SIMPLE_EDGE] llap
+                                SHUFFLE [RS_23]
+                                  PartitionCols:_col0
+                                  Group By Operator [GBY_18] (rows=50 width=4)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Map 9 [SIMPLE_EDGE] llap
+                                    SHUFFLE [RS_17]
+                                      PartitionCols:_col0
+                                      Group By Operator [GBY_16] (rows=50 width=4)
+                                        Output:["_col0"],keys:l_partkey
+                                        Filter Operator [FIL_62] (rows=100 width=4)
+                                          predicate:l_partkey is not null
+                                          TableScan [TS_13] (rows=100 width=4)
+                                            default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
 
 PREHOOK: query: explain select key, value, count(*) 
 from src_cbo b
@@ -2140,72 +2294,74 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_31]
-        Merge Join Operator [MERGEJOIN_44] (rows=34 width=186)
-          Conds:RS_27._col2=RS_28._col0(Left Semi),Output:["_col0","_col1","_col2"]
+      File Output Operator [FS_33]
+        Merge Join Operator [MERGEJOIN_46] (rows=34 width=186)
+          Conds:RS_29._col2=RS_30._col0(Inner),Output:["_col0","_col1","_col2"]
         <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_27]
+          SHUFFLE [RS_29]
             PartitionCols:_col2
-            Filter Operator [FIL_37] (rows=83 width=186)
-              predicate:_col2 is not null
-              Group By Operator [GBY_14] (rows=83 width=186)
-                Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1
-              <-Reducer 2 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_13]
-                  PartitionCols:_col0, _col1
-                  Group By Operator [GBY_12] (rows=83 width=186)
-                    Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
-                    Merge Join Operator [MERGEJOIN_43] (rows=166 width=178)
-                      Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"]
-                    <-Map 1 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_8]
-                        PartitionCols:_col0
-                        Select Operator [SEL_2] (rows=166 width=178)
-                          Output:["_col0","_col1"]
-                          Filter Operator [FIL_38] (rows=166 width=178)
-                            predicate:(key > '8')
-                            TableScan [TS_0] (rows=500 width=178)
-                              default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-                    <-Map 5 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_9]
-                        PartitionCols:_col0
-                        Group By Operator [GBY_7] (rows=69 width=87)
-                          Output:["_col0"],keys:_col0
-                          Select Operator [SEL_5] (rows=166 width=87)
-                            Output:["_col0"]
-                            Filter Operator [FIL_39] (rows=166 width=87)
+            Group By Operator [GBY_15] (rows=84 width=186)
+              Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1
+            <-Reducer 2 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_14]
+                PartitionCols:_col0, _col1
+                Group By Operator [GBY_13] (rows=84 width=186)
+                  Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
+                  Merge Join Operator [MERGEJOIN_45] (rows=168 width=178)
+                    Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"]
+                  <-Map 1 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_9]
+                      PartitionCols:_col0
+                      Select Operator [SEL_1] (rows=500 width=178)
+                        Output:["_col0","_col1"]
+                        TableScan [TS_0] (rows=500 width=178)
+                          default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                  <-Reducer 6 [SIMPLE_EDGE] llap
+                    SHUFFLE [RS_10]
+                      PartitionCols:_col0
+                      Group By Operator [GBY_7] (rows=69 width=87)
+                        Output:["_col0"],keys:KEY._col0
+                      <-Map 5 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_6]
+                          PartitionCols:_col0
+                          Group By Operator [GBY_5] (rows=69 width=87)
+                            Output:["_col0"],keys:key
+                            Filter Operator [FIL_41] (rows=166 width=87)
                               predicate:(key > '8')
-                              TableScan [TS_3] (rows=500 width=87)
+                              TableScan [TS_2] (rows=500 width=87)
                                 default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-        <-Reducer 7 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_28]
+        <-Reducer 9 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_30]
             PartitionCols:_col0
-            Group By Operator [GBY_26] (rows=34 width=8)
-              Output:["_col0"],keys:_col0
-              Select Operator [SEL_24] (rows=69 width=8)
-                Output:["_col0"]
-                Filter Operator [FIL_40] (rows=69 width=8)
-                  predicate:_col1 is not null
-                  Select Operator [SEL_42] (rows=69 width=8)
+            Group By Operator [GBY_27] (rows=34 width=8)
+              Output:["_col0"],keys:KEY._col0
+            <-Reducer 8 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_26]
+                PartitionCols:_col0
+                Group By Operator [GBY_25] (rows=34 width=8)
+                  Output:["_col0"],keys:_col1
+                  Select Operator [SEL_44] (rows=69 width=8)
                     Output:["_col1"]
                     Group By Operator [GBY_22] (rows=69 width=95)
                       Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
-                    <-Map 6 [SIMPLE_EDGE] llap
+                    <-Map 7 [SIMPLE_EDGE] llap
                       SHUFFLE [RS_21]
                         PartitionCols:_col0
                         Group By Operator [GBY_20] (rows=69 width=95)
                           Output:["_col0","_col1"],aggregations:["count()"],keys:key
-                          Filter Operator [FIL_41] (rows=166 width=87)
+                          Filter Operator [FIL_43] (rows=166 width=87)
                             predicate:(key > '9')
                             TableScan [TS_17] (rows=500 width=87)
                               default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
@@ -2226,8 +2382,9 @@ Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 5 <- Map 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
@@ -2236,41 +2393,42 @@ Stage-0
       Reducer 3 llap
       File Output Operator [FS_21]
         Merge Join Operator [MERGEJOIN_26] (rows=6 width=227)
-          Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"]
+          Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col0","_col1","_col2"]
         <-Reducer 2 [SIMPLE_EDGE] llap
           SHUFFLE [RS_17]
             PartitionCols:_col1
-            Select Operator [SEL_6] (rows=13 width=227)
+            Select Operator [SEL_5] (rows=13 width=227)
               Output:["_col0","_col1","_col2"]
-              Group By Operator [GBY_5] (rows=13 width=227)
+              Group By Operator [GBY_4] (rows=13 width=227)
                 Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1
               <-Map 1 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_4]
+                SHUFFLE [RS_3]
                   PartitionCols:_col0, _col1
-                  Group By Operator [GBY_3] (rows=13 width=295)
+                  Group By Operator [GBY_2] (rows=13 width=295)
                     Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr
-                    Filter Operator [FIL_24] (rows=26 width=223)
-                      predicate:p_name is not null
-                      TableScan [TS_0] (rows=26 width=223)
-                        default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
-        <-Reducer 5 [SIMPLE_EDGE] llap
+                    TableScan [TS_0] (rows=26 width=223)
+                      default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
+        <-Reducer 6 [SIMPLE_EDGE] llap
           SHUFFLE [RS_18]
             PartitionCols:_col0
-            Group By Operator [GBY_16] (rows=13 width=184)
-              Output:["_col0"],keys:_col0
-              Select Operator [SEL_11] (rows=26 width=184)
-                Output:["_col0"]
-                Filter Operator [FIL_25] (rows=26 width=491)
-                  predicate:first_value_window_0 is not null
-                  PTF Operator [PTF_10] (rows=26 width=491)
-                    Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
-                    Select Operator [SEL_9] (rows=26 width=491)
-                      Output:["_col1","_col2","_col5"]
-                    <-Map 4 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_8]
-                        PartitionCols:p_mfgr
-                        TableScan [TS_7] (rows=26 width=223)
-                          default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
+            Group By Operator [GBY_15] (rows=13 width=184)
+              Output:["_col0"],keys:KEY._col0
+            <-Reducer 5 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_14]
+                PartitionCols:_col0
+                Group By Operator [GBY_13] (rows=13 width=184)
+                  Output:["_col0"],keys:_col0
+                  Select Operator [SEL_10] (rows=26 width=491)
+                    Output:["_col0"]
+                    PTF Operator [PTF_9] (rows=26 width=491)
+                      Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
+                      Select Operator [SEL_8] (rows=26 width=491)
+                        Output:["_col1","_col2","_col5"]
+                      <-Map 4 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_7]
+                          PartitionCols:p_mfgr
+                          TableScan [TS_6] (rows=26 width=223)
+                            default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
 
 PREHOOK: query: explain select * 
 from src_cbo 
@@ -2290,62 +2448,66 @@ Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_25]
-        Select Operator [SEL_24] (rows=1 width=178)
+      File Output Operator [FS_27]
+        Select Operator [SEL_26] (rows=250 width=178)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_23]
-            Select Operator [SEL_22] (rows=1 width=178)
+          SHUFFLE [RS_25]
+            Select Operator [SEL_24] (rows=250 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_21] (rows=1 width=265)
-                predicate:_col3 is null
-                Merge Join Operator [MERGEJOIN_29] (rows=500 width=265)
-                  Conds:RS_18._col0=RS_19._col0(Left Outer),Output:["_col0","_col1","_col3"]
-                <-Map 7 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_19]
-                    PartitionCols:_col0
-                    Select Operator [SEL_14] (rows=166 width=87)
-                      Output:["_col0"]
-                      Filter Operator [FIL_27] (rows=166 width=87)
-                        predicate:(key > '2')
-                        TableScan [TS_12] (rows=500 width=87)
-                          default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+              Filter Operator [FIL_23] (rows=250 width=198)
+                predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+                Merge Join Operator [MERGEJOIN_32] (rows=500 width=198)
+                  Conds:RS_20._col0=RS_21._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
                 <-Reducer 2 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_18]
+                  SHUFFLE [RS_20]
                     PartitionCols:_col0
-                    Merge Join Operator [MERGEJOIN_28] (rows=500 width=178)
-                      Conds:(Inner),Output:["_col0","_col1"]
+                    Merge Join Operator [MERGEJOIN_31] (rows=500 width=194)
+                      Conds:(Inner),Output:["_col0","_col1","_col2","_col3"]
                     <-Map 1 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_15]
+                      SHUFFLE [RS_17]
                         Select Operator [SEL_1] (rows=500 width=178)
                           Output:["_col0","_col1"]
                           TableScan [TS_0] (rows=500 width=178)
                             default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
                     <-Reducer 6 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_16]
-                        Select Operator [SEL_11] (rows=1 width=8)
-                          Filter Operator [FIL_10] (rows=1 width=8)
-                            predicate:(_col0 = 0)
-                            Group By Operator [GBY_8] (rows=1 width=8)
-                              Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                            <-Map 5 [SIMPLE_EDGE] llap
-                              SHUFFLE [RS_7]
-                                Group By Operator [GBY_6] (rows=1 width=8)
-                                  Output:["_col0"],aggregations:["count()"]
-                                  Filter Operator [FIL_4] (rows=1 width=4)
-                                    predicate:false
-                                    Select Operator [SEL_3] (rows=500 width=4)
-                                      TableScan [TS_2] (rows=500 width=10)
-                                        default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE
+                      SHUFFLE [RS_18]
+                        Group By Operator [GBY_7] (rows=1 width=16)
+                          Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"]
+                        <-Map 5 [SIMPLE_EDGE] llap
+                          SHUFFLE [RS_6]
+                            Group By Operator [GBY_5] (rows=1 width=16)
+                              Output:["_col0","_col1"],aggregations:["count()","count(key)"]
+                              Filter Operator [FIL_29] (rows=166 width=87)
+                                predicate:(key > '2')
+                                TableScan [TS_2] (rows=500 width=87)
+                                  default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                <-Reducer 8 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_21]
+                    PartitionCols:_col0
+                    Group By Operator [GBY_15] (rows=69 width=91)
+                      Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                    <-Map 7 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_14]
+                        PartitionCols:_col0, _col1
+                        Group By Operator [GBY_13] (rows=69 width=91)
+                          Output:["_col0","_col1"],keys:_col0, true
+                          Select Operator [SEL_11] (rows=166 width=87)
+                            Output:["_col0"]
+                            Filter Operator [FIL_30] (rows=166 width=87)
+                              predicate:(key > '2')
+                              TableScan [TS_9] (rows=500 width=87)
+                                default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
 
 PREHOOK: query: explain select p_mfgr, b.p_name, p_size 
 from part b 
@@ -2366,58 +2528,128 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 5 <- Map 4 (SIMPLE_EDGE)
+Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 3 llap
-      File Output Operator [FS_22]
-        Select Operator [SEL_21] (rows=1 width=223)
+      File Output Operator [FS_54]
+        Select Operator [SEL_53] (rows=13 width=223)
           Output:["_col0","_col1","_col2"]
-          Filter Operator [FIL_20] (rows=1 width=344)
-            predicate:_col4 is null
-            Merge Join Operator [MERGEJOIN_27] (rows=26 width=344)
-              Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4"]
-            <-Map 6 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_18]
-                PartitionCols:_col0, _col1
-                Select Operator [SEL_13] (rows=8 width=219)
-                  Output:["_col0","_col1"]
-                  Filter Operator [FIL_25] (rows=8 width=223)
-                    predicate:(p_size < 10)
-                    TableScan [TS_11] (rows=26 width=223)
-                      default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
+          Filter Operator [FIL_52] (rows=13 width=243)
+            predicate:CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END
+            Merge Join Operator [MERGEJOIN_76] (rows=26 width=243)
+              Conds:RS_49._col0, _col1=RS_50._col3, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col8"]
+            <-Reducer 12 [SIMPLE_EDGE] llap
+              SHUFFLE [RS_50]
+                PartitionCols:_col3, _col1
+                Merge Join Operator [MERGEJOIN_75] (rows=2 width=223)
+                  Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1","_col2","_col3"]
+                <-Reducer 11 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_42]
+                    PartitionCols:_col0
+                    Select Operator [SEL_35] (rows=4 width=223)
+                      Output:["_col0","_col1","_col2"]
+                      Group By Operator [GBY_34] (rows=4 width=219)
+                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                      <-Reducer 10 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_33]
+                          PartitionCols:_col0, _col1
+                          Group By Operator [GBY_32] (rows=4 width=219)
+                            Output:["_col0","_col1"],keys:_col1, _col2
+                            Merge Join Operator [MERGEJOIN_74] (rows=8 width=219)
+                              Conds:RS_28._col0=RS_29._col0(Inner),Output:["_col1","_col2"]
+                            <-Map 9 [SIMPLE_EDGE] llap
+                              SHUFFLE [RS_28]
+                                PartitionCols:_col0
+                                Select Operator [SEL_21] (rows=8 width=219)
+                                  Output:["_col0","_col1"]
+                                  Filter Operator [FIL_69] (rows=8 width=223)
+                                    predicate:(p_size < 10)
+                                    TableScan [TS_19] (rows=26 width=223)
+                                      default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
+                            <-Reducer 14 [SIMPLE_EDGE] llap
+                              SHUFFLE [RS_29]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_26] (rows=5 width=98)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Map 13 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_25]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_24] (rows=5 width=98)
+                                      Output:["_col0"],keys:p_mfgr
+                                      TableScan [TS_22] (rows=26 width=98)
+                                        default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"]
+                <-Reducer 16 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_43]
+                    PartitionCols:_col0
+                    Group By Operator [GBY_40] (rows=13 width=121)
+                      Output:["_col0"],keys:KEY._col0
+                    <-Map 15 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_39]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_38] (rows=13 width=121)
+                          Output:["_col0"],keys:p_name
+                          TableScan [TS_36] (rows=26 width=121)
+                            default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name"]
             <-Reducer 2 [SIMPLE_EDGE] llap
-              SHUFFLE [RS_17]
+              SHUFFLE [RS_49]
                 PartitionCols:_col0, _col1
-                Merge Join Operator [MERGEJOIN_26] (rows=26 width=223)
-                  Conds:(Inner),Output:["_col0","_col1","_col2"]
+                Merge Join Operator [MERGEJOIN_73] (rows=26 width=239)
+                  Conds:RS_46._col1=RS_47._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"]
                 <-Map 1 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_14]
+                  SHUFFLE [RS_46]
+                    PartitionCols:_col1
                     Select Operator [SEL_1] (rows=26 width=223)
                       Output:["_col0","_col1","_col2"]
                       TableScan [TS_0] (rows=26 width=223)
                         default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
-                <-Reducer 5 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_15]
-                    Select Operator [SEL_10] (rows=1 width=8)
-                      Filter Operator [FIL_9] (rows=1 width=8)
-                        predicate:(_col0 = 0)
-                        Group By Operator [GBY_7] (rows=1 width=8)
-                          Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                        <-Map 4 [SIMPLE_EDGE] llap
-                          SHUFFLE [RS_6]
-                            Group By Operator [GBY_5] (rows=1 width=8)
-                              Output:["_col0"],aggregations:["count()"]
-                              Select Operator [SEL_4] (rows=1 width=223)
-                                Filter Operator [FIL_24] (rows=1 width=223)
-                                  predicate:((p_size < 10) and (p_name is null or p_mfgr is null))
-                                  TableScan [TS_2] (rows=26 width=223)
-                                    default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
+                <-Reducer 6 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_47]
+                    PartitionCols:_col0
+                    Group By Operator [GBY_17] (rows=2 width=114)
+                      Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
+                    <-Reducer 5 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_16]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_15] (rows=2 width=114)
+                          Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col1)"],keys:_col2
+                          Select Operator [SEL_14] (rows=8 width=219)
+                            Output:["_col2","_col1"]
+                            Merge Join Operator [MERGEJOIN_72] (rows=8 width=219)
+                              Conds:RS_11._col0=RS_12._col0(Inner),Output:["_col1","_col2"]
+                            <-Map 4 [SIMPLE_EDGE] llap
+                              SHUFFLE [RS_11]
+                                PartitionCols:_col0
+                                Select Operator [SEL_4] (rows=8 width=219)
+                                  Output:["_col0","_col1"]
+                                  Filter Operator [FIL_67] (rows=8 width=223)
+                                    predicate:(p_size < 10)
+                                    TableScan [TS_2] (rows=26 width=223)
+                                      default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
+                            <-Reducer 8 [SIMPLE_EDGE] llap
+                              SHUFFLE [RS_12]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_9] (rows=5 width=98)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Map 7 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_8]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_7] (rows=5 width=98)
+                                      Output:["_col0"],keys:p_mfgr
+                                      TableScan [TS_5] (rows=26 width=98)
+                                        default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"]
 
 PREHOOK: query: explain select p_name, p_size 
 from 
@@ -2439,71 +2671,73 @@ Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 Reducer 6 <- Map 5 (SIMPLE_EDGE)
 Reducer 8 <- Map 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_35]
-        Select Operator [SEL_34] (rows=1 width=125)
+      File Output Operator [FS_36]
+        Select Operator [SEL_35] (rows=13 width=125)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_33]
-            Select Operator [SEL_32] (rows=1 width=125)
+          SHUFFLE [RS_34]
+            Select Operator [SEL_33] (rows=13 width=125)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_31] (rows=1 width=133)
-                predicate:_col3 is null
-                Merge Join Operator [MERGEJOIN_41] (rows=26 width=133)
-                  Conds:RS_28.UDFToDouble(_col1)=RS_29._col0(Left Outer),Output:["_col0","_col1","_col3"]
+              Filter Operator [FIL_32] (rows=13 width=145)
+                predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+                Merge Join Operator [MERGEJOIN_42] (rows=26 width=145)
+                  Conds:RS_29.UDFToDouble(_col1)=RS_30._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
                 <-Reducer 2 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_28]
+                  SHUFFLE [RS_29]
                     PartitionCols:UDFToDouble(_col1)
-                    Merge Join Operator [MERGEJOIN_40] (rows=26 width=125)
-                      Conds:(Inner),Output:["_col0","_col1"]
+                    Merge Join Operator [MERGEJOIN_41] (rows=26 width=141)
+                      Conds:(Inner),Output:["_col0","_col1","_col2","_col3"]
                     <-Map 1 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_25]
+                      SHUFFLE [RS_26]
                         Select Operator [SEL_1] (rows=26 width=125)
                           Output:["_col0","_col1"]
                           TableScan [TS_0] (rows=26 width=125)
                             default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"]
                     <-Reducer 6 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_26]
-                        Select Operator [SEL_17] (rows=1 width=8)
-                          Filter Operator [FIL_16] (rows=1 width=8)
-                            predicate:(_col0 = 0)
-                            Group By Operator [GBY_14] (rows=1 width=8)
-                              Output:["_col0"],aggregations:["count()"]
-                              Select Operator [SEL_10] (rows=1 width=8)
-                                Filter Operator [FIL_9] (rows=1 width=8)
-                                  predicate:_col0 is null
-                                  Group By Operator [GBY_7] (rows=1 width=8)
-                                    Output:["_col0"],aggregations:["avg(VALUE._col0)"]
-                                  <-Map 5 [SIMPLE_EDGE] llap
-                                    SHUFFLE [RS_6]
-                                      Group By Operator [GBY_5] (rows=1 width=76)
-                                        Output:["_col0"],aggregations:["avg(p_size)"]
-                                        Filter Operator [FIL_37] (rows=8 width=4)
-                                          predicate:(p_size < 10)
-                                          TableScan [TS_2] (rows=26 width=4)
-                                            default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"]
-                <-Reducer 8 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_29]
+                      SHUFFLE [RS_27]
+                        Group By Operator [GBY_12] (rows=1 width=16)
+                          Output:["_col0","_col1"],aggregations:["count()","count(_col0)"]
+                          Group By Operator [GBY_7] (rows=1 width=8)
+                            Output:["_col0"],aggregations:["avg(VALUE._col0)"]
+                          <-Map 5 [SIMPLE_EDGE] llap
+                            SHUFFLE [RS_6]
+                              Group By Operator [GBY_5] (rows=1 width=76)
+                                Output:["_col0"],aggregations:["avg(p_size)"]
+                                Filter Operator [FIL_38] (rows=8 width=4)
+                                  predicate:(p_size < 10)
+                                  TableScan [TS_2] (rows=26 width=4)
+                                    default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"]
+                <-Reducer 9 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_30]
                     PartitionCols:_col0
-                    Group By Operator [GBY_23] (rows=1 width=8)
-                      Output:["_col0"],aggregations:["avg(VALUE._col0)"]
-                    <-Map 7 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_22]
-                        Group By Operator [GBY_21] (rows=1 width=76)
-                          Output:["_col0"],aggregations:["avg(p_size)"]
-                          Filter Operator [FIL_39] (rows=8 width=4)
-                            predicate:(p_size < 10)
-                            TableScan [TS_18] (rows=26 width=4)
-                              default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"]
+                    Group By Operator [GBY_24] (rows=1 width=12)
+                      Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                    <-Reducer 8 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_23]
+                        PartitionCols:_col0, _col1
+                        Group By Operator [GBY_22] (rows=1 width=12)
+                          Output:["_col0","_col1"],keys:_col0, true
+                          Group By Operator [GBY_19] (rows=1 width=8)
+                            Output:["_col0"],aggregations:["avg(VALUE._col0)"]
+                          <-Map 7 [SIMPLE_EDGE] llap
+                            SHUFFLE [RS_18]
+                              Group By Operator [GBY_17] (rows=1 width=76)
+                                Output:["_col0"],aggregations:["avg(p_size)"]
+                                Filter Operator [FIL_40] (rows=8 width=4)
+                                  predicate:(p_size < 10)
+                                  TableScan [TS_14] (rows=26 width=4)
+                                    default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"]
 
 PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) 
 from part b 
@@ -2528,53 +2762,119 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 10 <- Map 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 10 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE)
+Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
+Reducer 19 <- Map 18 (SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 20 <- Reducer 19 (SIMPLE_EDGE)
+Reducer 22 <- Map 21 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 Reducer 7 <- Map 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 5 llap
-      File Output Operator [FS_37]
-        Select Operator [SEL_36] (rows=1 width=106)
+      File Output Operator [FS_83]
+        Select Operator [SEL_82] (rows=2 width=106)
           Output:["_col0","_col1"]
         <-Reducer 4 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_35]
-            Select Operator [SEL_34] (rows=1 width=106)
+          SHUFFLE [RS_81]
+            Select Operator [SEL_80] (rows=2 width=106)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_33] (rows=1 width=204)
-                predicate:_col3 is null
-                Merge Join Operator [MERGEJOIN_42] (rows=5 width=204)
-                  Conds:RS_30._col0, _col1=RS_31._col0, _col1(Left Outer),Output:["_col0","_col1","_col3"]
-                <-Reducer 10 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_31]
-                    PartitionCols:_col0, _col1
-                    Select Operator [SEL_26] (rows=1 width=106)
-                      Output:["_col0","_col1"]
-                      Filter Operator [FIL_39] (rows=1 width=114)
-                        predicate:((_col2 - _col1) > 600.0)
-                        Group By Operator [GBY_24] (rows=5 width=114)
-                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
-                        <-Map 9 [SIMPLE_EDGE] llap
-                          SHUFFLE [RS_23]
-                            PartitionCols:_col0
-                            Group By Operator [GBY_22] (rows=5 width=114)
-                              Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr
-                              TableScan [TS_20] (rows=26 width=106)
-                                default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
+              Filter Operator [FIL_79] (rows=2 width=126)
+                predicate:CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END
+                Merge Join Operator [MERGEJOIN_108] (rows=5 width=126)
+                  Conds:RS_76._col0, _col1=RS_77._col3, _col1(Left Outer),Output:["_col0","_col1","_col3","_col4","_col7"]
+                <-Reducer 17 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_77]
+                    PartitionCols:_col3, _col1
+                    Merge Join Operator [MERGEJOIN_107] (rows=1 width=110)
+                      Conds:RS_69._col0=RS_70._col0(Inner),Output:["_col1","_col2","_col3"]
+                    <-Reducer 16 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_69]
+                        PartitionCols:_col0
+                        Select Operator [SEL_57] (rows=1 width=110)
+                          Output:["_col0","_col1","_col2"]
+                          Group By Operator [GBY_56] (rows=1 width=106)
+                            Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                          <-Reducer 15 [SIMPLE_EDGE] llap
+                            SHUFFLE [RS_55]
+                              PartitionCols:_col0, _col1
+                              Group By Operator [GBY_54] (rows=1 width=106)
+                                Output:["_col0","_col1"],keys:_col0, _col3
+                                Merge Join Operator [MERGEJOIN_106] (rows=1 width=106)
+                                  Conds:RS_50._col1=RS_51._col0(Inner),Output:["_col0","_col3"]
+                                <-Reducer 14 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_50]
+                                    PartitionCols:_col1
+                                    Select Operator [SEL_38] (rows=1 width=114)
+                                      Output:["_col0","_col1"]
+                                      Filter Operator [FIL_98] (rows=1 width=114)
+                                        predicate:((_col2 - _col1) > 600.0)
+                                        Group By Operator [GBY_36] (rows=5 width=114)
+                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
+                                        <-Map 13 [SIMPLE_EDGE] llap
+                                          SHUFFLE [RS_35]
+                                            PartitionCols:_col0
+                                            Group By Operator [GBY_34] (rows=5 width=114)
+                                              Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr
+                                              TableScan [TS_32] (rows=26 width=106)
+                                                default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
+                                <-Reducer 20 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_51]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_48] (rows=2 width=8)
+                                      Output:["_col0"],keys:KEY._col0
+                                    <-Reducer 19 [SIMPLE_EDGE] llap
+                                      SHUFFLE [RS_47]
+                                        PartitionCols:_col0
+                                        Group By Operator [GBY_46] (rows=2 width=8)
+                                          Output:["_col0"],keys:_col1
+                                          Select Operator [SEL_103] (rows=5 width=8)
+                                            Output:["_col1"]
+                                            Group By Operator [GBY_43] (rows=5 width=106)
+                                              Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0
+                                            <-Map 18 [SIMPLE_EDGE] llap
+                                              SHUFFLE [RS_42]
+                                                PartitionCols:_col0
+                                                Group By Operator [GBY_41] (rows=5 width=106)
+                                                  Output:["_col0","_col1"],aggregations:["min(p_retailprice)"],keys:p_mfgr
+                                                  Select Operator [SEL_40] (rows=26 width=106)
+                                                    Output:["p_mfgr","p_retailprice"]
+                                                    TableScan [TS_39] (rows=26 width=106)
+                                                      default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
+                    <-Reducer 22 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_70]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_67] (rows=5 width=98)
+                          Output:["_col0"],keys:_col0
+                          Group By Operator [GBY_62] (rows=5 width=98)
+                            Output:["_col0"],keys:KEY._col0
+                          <-Map 21 [SIMPLE_EDGE] llap
+                            SHUFFLE [RS_61]
+                              PartitionCols:_col0
+                              Group By Operator [GBY_60] (rows=5 width=98)
+                                Output:["_col0"],keys:p_mfgr
+                                TableScan [TS_58] (rows=26 width=98)
+                                  default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"]
                 <-Reducer 3 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_30]
+                  SHUFFLE [RS_76]
                     PartitionCols:_col0, _col1
-                    Merge Join Operator [MERGEJOIN_41] (rows=5 width=106)
-                      Conds:(Inner),Output:["_col0","_col1"]
+                    Merge Join Operator [MERGEJOIN_105] (rows=5 width=122)
+                      Conds:RS_73._col1=RS_74._col0(Left Outer),Output:["_col0","_col1","_col3","_col4"]
                     <-Reducer 2 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_27]
+                      SHUFFLE [RS_73]
+                        PartitionCols:_col1
                         Group By Operator [GBY_4] (rows=5 width=106)
                           Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0
                         <-Map 1 [SIMPLE_EDGE] llap
@@ -2586,31 +2886,61 @@ Stage-0
                                 Output:["p_mfgr","p_retailprice"]
                                 TableScan [TS_0] (rows=26 width=106)
                                   default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
-                    <-Reducer 8 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_28]
-                        Select Operator [SEL_19] (rows=1 width=8)
-                          Filter Operator [FIL_18] (rows=1 width=8)
-                            predicate:(_col0 = 0)
-                            Group By Operator [GBY_16] (rows=1 width=8)
-                              Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                            <-Reducer 7 [SIMPLE_EDGE] llap
-                              SHUFFLE [RS_15]
-                                Group By Operator [GBY_14] (rows=1 width=8)
-                                  Output:["_col0"],aggregations:["count()"]
-                                  Select Operator [SEL_12] (rows=1 width=114)
-                                    Filter Operator [FIL_11] (rows=1 width=114)
-                                      predicate:(((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null))
-                                      Group By Operator [GBY_10] (rows=5 width=114)
-                                        Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
-                                      <-Map 6 [SIMPLE_EDGE] llap
-                                        SHUFFLE [RS_9]
-                                          PartitionCols:_col0
-                                          Group By Operator [GBY_8] (rows=5 width=114)
-                                            Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr
-                                            Select Operator [SEL_7] (rows=26 width=106)
-                                              Output:["p_mfgr","p_retailprice"]
-                                              TableScan [TS_6] (rows=26 width=106)
-                                                default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
+                    <-Reducer 9 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_74]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_30] (rows=1 width=24)
+                          Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
+                        <-Reducer 8 [SIMPLE_EDGE] llap
+                          SHUFFLE [RS_29]
+                            PartitionCols:_col0
+                            Group By Operator [GBY_28] (rows=1 width=24)
+                              Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col0)"],keys:_col3
+                              Select Operator [SEL_27] (rows=1 width=106)
+                                Output:["_col3","_col0"]
+                                Merge Join Operator [MERGEJOIN_104] (rows=1 width=106)
+                                  Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col0","_col3"]
+                                <-Reducer 12 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_25]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_22] (rows=2 width=8)
+                                      Output:["_col0"],keys:KEY._col0
+                                    <-Reducer 11 [SIMPLE_EDGE] llap
+                                      SHUFFLE [RS_21]
+                                        PartitionCols:_col0
+                                        Group By Operator [GBY_20] (rows=2 width=8)
+                                          Output:["_col0"],keys:_col1
+                                          Select Operator [SEL_102] (rows=5 width=8)
+                                            Output:["_col1"]
+                                            Group By Operator [GBY_17] (rows=5 width=106)
+                                              Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0
+                                            <-Map 10 [SIMPLE_EDGE] llap
+                                              SHUFFLE [RS_16]
+                                                PartitionCols:_col0
+                                                Group By Operator [GBY_15] (rows=5 width=106)
+                                                  Output:["_col0","_col1"],aggregations:["min(p_retailprice)"],keys:p_mfgr
+                                                  Select Operator [SEL_14] (rows=26 width=106)
+                                                    Output:["p_mfgr","p_retailprice"]
+                                                    TableScan [TS_13] (rows=26 width=106)
+                                                      default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
+                                <-Reducer 7 [SIMPLE_EDGE] llap
+                                  SHUFFLE [RS_24]
+                                    PartitionCols:_col1
+                                    Select Operator [SEL_12] (rows=1 width=114)
+                                      Output:["_col0","_col1"]
+                                      Filter Operator [FIL_96] (rows=1 width=114)
+                                        predicate:((_col2 - _col1) > 600.0)
+                                        Group By Operator [GBY_10] (rows=5 width=114)
+                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
+                                        <-Map 6 [SIMPLE_EDGE] llap
+                                          SHUFFLE [RS_9]
+                                            PartitionCols:_col0
+                                            Group By Operator [GBY_8] (rows=5 width=114)
+                                              Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr
+                                              Select Operator [SEL_7] (rows=26 width=106)
+                                                Output:["p_mfgr","p_retailprice"]
+                                                TableScan [TS_6] (rows=26 width=106)
+                                                  default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"]
 
 PREHOOK: query: explain select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index 1a532da..72a9344 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -166,7 +166,7 @@ where key in (select key+18 from src1) order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"UDFToDouble(src1.key) is not null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + 18.0) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]}
 146	val_146
 273	val_273
 PREHOOK: query: select * from src1 a
@@ -178,15 +178,15 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"UDFToDouble((UDFToInteger(b.ctinyint) + 300)) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"d
 efault.alltypesorc.ctinyint"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 311	val_311
-Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select key, value from src1
 where key not in (select key+18 from src1) order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + 18.0) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(not CASE WHEN ((count(*) = 0)) THEN (false) WHEN (i is not null) THEN (true) WHEN (src1.key is null) THEN (null) WHEN ((count((UDFToDouble(src1.key) + 18.0)) < count(*))) THEN (true) ELSE (false) END)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.s
 rc1"}]}
 PREHOOK: query: select * from src1 a
 where not exists
   (select cint from alltypesorc b
@@ -196,7 +196,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(UDFToInteger(b.ctinyint) + 300) is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltyp
 esorc.ctinyint"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"53191056e05af9080a30de853e8cea9c","queryText":"select * from src1 a\nwhere not exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true is null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}
 ,{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 369	
 401	val_401
 406	val_406

[04/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_notexists_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out
index 9349f2d..5948f9a 100644
--- a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out
@@ -22,7 +22,10 @@ having not exists
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-5 is a root stage
+  Stage-3 depends on stages: Stage-5
+  Stage-4 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -69,31 +72,23 @@ STAGE PLANS:
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           TableScan
-            alias: a
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (value > 'val_12') (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col1 (type: string), _col0 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Outer Join0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
-            1 _col1 (type: string), _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col3
+            1 _col0 (type: string), _col1 (type: string)
+          outputColumnNames: _col0, _col1, _col4
           Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
+            predicate: _col4 is null (type: boolean)
             Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: string)
@@ -107,6 +102,115 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: key (type: string), value (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: string)
+            mode: complete
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (value > 'val_12') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string), _col1 (type: string)
+            1 _col0 (type: string), _col1 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col2 (type: string), _col3 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), true (type: boolean)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
@@ -173,8 +277,10 @@ having not exists
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-3
-  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-5 is a root stage
+  Stage-3 depends on stages: Stage-5
+  Stage-4 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -227,17 +333,18 @@ STAGE PLANS:
               value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              key expressions: _col1 (type: string)
+              key expressions: _col0 (type: string)
               sort order: +
-              Map-reduce partition columns: _col1 (type: string)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Outer Join0 to 1
           keys:
             0 _col1 (type: string)
-            1 _col1 (type: string)
+            1 _col0 (type: string)
           outputColumnNames: _col0, _col1, _col3
           Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
@@ -255,6 +362,48 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string), key (type: string)
+              outputColumnNames: value, key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: value (type: string), key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string)
+            outputColumnNames: _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string)
+              mode: complete
+              outputColumnNames: _col0
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-3
     Map Reduce
       Map Operator Tree:
@@ -265,35 +414,80 @@ STAGE PLANS:
               predicate: (value > 'val_12') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: value, key
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: value (type: string), key (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col0 (type: string)
+            outputColumnNames: _col2, _col0
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col2 (type: string), _col0 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string)
             outputColumnNames: _col1
-            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string)
+              mode: complete
+              outputColumnNames: _col0
+              Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: string), true (type: boolean)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator

[19/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
new file mode 100644
index 0000000..a373cdd
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -0,0 +1,3007 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.linq4j.function.Function2;
+import org.apache.calcite.plan.Context;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCostImpl;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgram;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.BiRel;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.Correlate;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.logical.LogicalAggregate;
+import org.apache.calcite.rel.logical.LogicalCorrelate;
+import org.apache.calcite.rel.logical.LogicalFilter;
+import org.apache.calcite.rel.logical.LogicalJoin;
+import org.apache.calcite.rel.logical.LogicalProject;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.rules.FilterCorrelateRule;
+import org.apache.calcite.rel.rules.FilterJoinRule;
+import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexSubQuery;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.fun.SqlCountAggFunction;
+import org.apache.calcite.sql.fun.SqlSingleValueAggFunction;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Bug;
+import org.apache.calcite.util.Holder;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Litmus;
+import org.apache.calcite.util.Pair;
+import org.apache.calcite.util.ReflectUtil;
+import org.apache.calcite.util.ReflectiveVisitor;
+import org.apache.calcite.util.Stacks;
+import org.apache.calcite.util.Util;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableSortedMap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Multimaps;
+import com.google.common.collect.Sets;
+import com.google.common.collect.SortedSetMultimap;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/**
+ * NOTE: this whole logic is replicated from Calcite's RelDecorrelator
+ *  and is exteneded to make it suitable for HIVE
+ *  TODO:
+ *    We should get rid of this and replace it with Calcite's RelDecorrelator
+ *    once that works with Join, Project etc instead of LogicalJoin, LogicalProject.
+ *    Also we need to have CALCITE-1511 fixed
+ *
+ * RelDecorrelator replaces all correlated expressions (corExp) in a relational
+ * expression (RelNode) tree with non-correlated expressions that are produced
+ * from joining the RelNode that produces the corExp with the RelNode that
+ * references it.
+ *
+ * <p>TODO:</p>
+ * <ul>
+ *   <li>replace {@code CorelMap} constructor parameter with a RelNode
+ *   <li>make {@link #currentRel} immutable (would require a fresh
+ *      RelDecorrelator for each node being decorrelated)</li>
+ *   <li>make fields of {@code CorelMap} immutable</li>
+ *   <li>make sub-class rules static, and have them create their own
+ *   de-correlator</li>
+ * </ul>
+ */
+public class HiveRelDecorrelator implements ReflectiveVisitor {
+  //~ Static fields/initializers ---------------------------------------------
+
+  protected static final Logger LOG = LoggerFactory.getLogger(
+          HiveRelDecorrelator.class);
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final RelBuilder relBuilder;
+
+  // map built during translation
+  private CorelMap cm;
+
+  private final ReflectUtil.MethodDispatcher<Frame> dispatcher =
+          ReflectUtil.createMethodDispatcher(Frame.class, this, "decorrelateRel",
+                  RelNode.class);
+
+  private final RexBuilder rexBuilder;
+
+  // The rel which is being visited
+  private RelNode currentRel;
+
+  private final Context context;
+
+  /** Built during decorrelation, of rel to all the newly created correlated
+   * variables in its output, and to map old input positions to new input
+   * positions. This is from the view point of the parent rel of a new rel. */
+  private final Map<RelNode, Frame> map = new HashMap<>();
+
+  private final HashSet<LogicalCorrelate> generatedCorRels = Sets.newHashSet();
+
+  //~ Constructors -----------------------------------------------------------
+
+  private HiveRelDecorrelator (
+          RelOptCluster cluster,
+          CorelMap cm,
+          Context context) {
+    this.cm = cm;
+    this.rexBuilder = cluster.getRexBuilder();
+    this.context = context;
+    relBuilder = RelFactories.LOGICAL_BUILDER.create(cluster, null);
+
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  /** Decorrelates a query.
+   *
+   * <p>This is the main entry point to {@code RelDecorrelator}.
+   *
+   * @param rootRel Root node of the query
+   *
+   * @return Equivalent query with all
+   * {@link org.apache.calcite.rel.logical.LogicalCorrelate} instances removed
+   */
+  public static RelNode decorrelateQuery(RelNode rootRel) {
+    final CorelMap corelMap = new CorelMapBuilder().build(rootRel);
+    if (!corelMap.hasCorrelation()) {
+      return rootRel;
+    }
+
+    final RelOptCluster cluster = rootRel.getCluster();
+    final HiveRelDecorrelator decorrelator =
+            new HiveRelDecorrelator(cluster, corelMap,
+                    cluster.getPlanner().getContext());
+
+    RelNode newRootRel = decorrelator.removeCorrelationViaRule(rootRel);
+
+    if (!decorrelator.cm.mapCorVarToCorRel.isEmpty()) {
+      newRootRel = decorrelator.decorrelate(newRootRel);
+    }
+
+    return newRootRel;
+  }
+
+  private void setCurrent(RelNode root, LogicalCorrelate corRel) {
+    currentRel = corRel;
+    if (corRel != null) {
+      cm = new CorelMapBuilder().build(Util.first(root, corRel));
+    }
+  }
+
+  private RelNode decorrelate(RelNode root) {
+    // first adjust count() expression if any
+    HepProgram program = HepProgram.builder()
+            .addRuleInstance(new AdjustProjectForCountAggregateRule(false))
+            .addRuleInstance(new AdjustProjectForCountAggregateRule(true))
+            .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN)
+            .addRuleInstance(FilterProjectTransposeRule.INSTANCE)
+            .addRuleInstance(FilterCorrelateRule.INSTANCE)
+            .build();
+
+    HepPlanner planner = createPlanner(program);
+
+    planner.setRoot(root);
+    root = planner.findBestExp();
+
+    // Perform decorrelation.
+    map.clear();
+
+    final Frame frame = getInvoke(root, null);
+    if (frame != null) {
+      // has been rewritten; apply rules post-decorrelation
+      final HepProgram program2 = HepProgram.builder()
+              .addRuleInstance(FilterJoinRule.FILTER_ON_JOIN)
+              .addRuleInstance(FilterJoinRule.JOIN)
+              .build();
+
+      final HepPlanner planner2 = createPlanner(program2);
+      final RelNode newRoot = frame.r;
+      planner2.setRoot(newRoot);
+      return planner2.findBestExp();
+    }
+
+    return root;
+  }
+
+  private Function2<RelNode, RelNode, Void> createCopyHook() {
+    return new Function2<RelNode, RelNode, Void>() {
+      public Void apply(RelNode oldNode, RelNode newNode) {
+        if (cm.mapRefRelToCorVar.containsKey(oldNode)) {
+          cm.mapRefRelToCorVar.putAll(newNode,
+                  cm.mapRefRelToCorVar.get(oldNode));
+        }
+        if (oldNode instanceof LogicalCorrelate
+                && newNode instanceof LogicalCorrelate) {
+          LogicalCorrelate oldCor = (LogicalCorrelate) oldNode;
+          CorrelationId c = oldCor.getCorrelationId();
+          if (cm.mapCorVarToCorRel.get(c) == oldNode) {
+            cm.mapCorVarToCorRel.put(c, newNode);
+          }
+
+          if (generatedCorRels.contains(oldNode)) {
+            generatedCorRels.add((LogicalCorrelate) newNode);
+          }
+        }
+        return null;
+      }
+    };
+  }
+
+  private HepPlanner createPlanner(HepProgram program) {
+    // Create a planner with a hook to update the mapping tables when a
+    // node is copied when it is registered.
+    return new HepPlanner(
+            program,
+            context,
+            true,
+            createCopyHook(),
+            RelOptCostImpl.FACTORY);
+  }
+
+  public RelNode removeCorrelationViaRule(RelNode root) {
+    HepProgram program = HepProgram.builder()
+            .addRuleInstance(new RemoveSingleAggregateRule())
+            .addRuleInstance(new RemoveCorrelationForScalarProjectRule())
+            .addRuleInstance(new RemoveCorrelationForScalarAggregateRule())
+            .build();
+
+    HepPlanner planner = createPlanner(program);
+
+    planner.setRoot(root);
+    return planner.findBestExp();
+  }
+
+  protected RexNode decorrelateExpr(RexNode exp) {
+    DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
+    return exp.accept(shuttle);
+  }
+
+  protected RexNode removeCorrelationExpr(
+          RexNode exp,
+          boolean projectPulledAboveLeftCorrelator) {
+    RemoveCorrelationRexShuttle shuttle =
+            new RemoveCorrelationRexShuttle(rexBuilder,
+                    projectPulledAboveLeftCorrelator, null, ImmutableSet.<Integer>of());
+    return exp.accept(shuttle);
+  }
+
+  protected RexNode removeCorrelationExpr(
+          RexNode exp,
+          boolean projectPulledAboveLeftCorrelator,
+          RexInputRef nullIndicator) {
+    RemoveCorrelationRexShuttle shuttle =
+            new RemoveCorrelationRexShuttle(rexBuilder,
+                    projectPulledAboveLeftCorrelator, nullIndicator,
+                    ImmutableSet.<Integer>of());
+    return exp.accept(shuttle);
+  }
+
+  protected RexNode removeCorrelationExpr(
+          RexNode exp,
+          boolean projectPulledAboveLeftCorrelator,
+          Set<Integer> isCount) {
+    RemoveCorrelationRexShuttle shuttle =
+            new RemoveCorrelationRexShuttle(rexBuilder,
+                    projectPulledAboveLeftCorrelator, null, isCount);
+    return exp.accept(shuttle);
+  }
+
+  /** Fallback if none of the other {@code decorrelateRel} methods match. */
+  public Frame decorrelateRel(RelNode rel) {
+    RelNode newRel = rel.copy(rel.getTraitSet(), rel.getInputs());
+
+    if (rel.getInputs().size() > 0) {
+      List<RelNode> oldInputs = rel.getInputs();
+      List<RelNode> newInputs = Lists.newArrayList();
+      for (int i = 0; i < oldInputs.size(); ++i) {
+        final Frame frame = getInvoke(oldInputs.get(i), rel);
+        if (frame == null || !frame.corVarOutputPos.isEmpty()) {
+          // if input is not rewritten, or if it produces correlated
+          // variables, terminate rewrite
+          return null;
+        }
+        newInputs.add(frame.r);
+        newRel.replaceInput(i, frame.r);
+      }
+
+      if (!Util.equalShallow(oldInputs, newInputs)) {
+        newRel = rel.copy(rel.getTraitSet(), newInputs);
+      }
+    }
+
+    // the output position should not change since there are no corVars
+    // coming from below.
+    return register(rel, newRel, identityMap(rel.getRowType().getFieldCount()),
+            ImmutableSortedMap.<Correlation, Integer>of());
+  }
+
+  /**
+   * Rewrite Sort.
+   *
+   * @param rel Sort to be rewritten
+   */
+  public Frame decorrelateRel(HiveSortLimit rel) {
+    //
+    // Rewrite logic:
+    //
+    // 1. change the collations field to reference the new input.
+    //
+
+    // Sort itself should not reference cor vars.
+    assert !cm.mapRefRelToCorVar.containsKey(rel);
+
+    // Sort only references field positions in collations field.
+    // The collations field in the newRel now need to refer to the
+    // new output positions in its input.
+    // Its output does not change the input ordering, so there's no
+    // need to call propagateExpr.
+
+    final RelNode oldInput = rel.getInput();
+    final Frame frame = getInvoke(oldInput, rel);
+    if (frame == null) {
+      // If input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+    final RelNode newInput = frame.r;
+
+    Mappings.TargetMapping mapping =
+            Mappings.target(
+                    frame.oldToNewOutputPos,
+                    oldInput.getRowType().getFieldCount(),
+                    newInput.getRowType().getFieldCount());
+
+    RelCollation oldCollation = rel.getCollation();
+    RelCollation newCollation = RexUtil.apply(mapping, oldCollation);
+
+    final RelNode newSort = HiveSortLimit.create(newInput, newCollation, rel.offset, rel.fetch);
+
+    // Sort does not change input ordering
+    return register(rel, newSort, frame.oldToNewOutputPos,
+            frame.corVarOutputPos);
+  }
+  /**
+   * Rewrite Sort.
+   *
+   * @param rel Sort to be rewritten
+   */
+  public Frame decorrelateRel(Sort rel) {
+    //
+    // Rewrite logic:
+    //
+    // 1. change the collations field to reference the new input.
+    //
+
+    // Sort itself should not reference cor vars.
+    assert !cm.mapRefRelToCorVar.containsKey(rel);
+
+    // Sort only references field positions in collations field.
+    // The collations field in the newRel now need to refer to the
+    // new output positions in its input.
+    // Its output does not change the input ordering, so there's no
+    // need to call propagateExpr.
+
+    final RelNode oldInput = rel.getInput();
+    final Frame frame = getInvoke(oldInput, rel);
+    if (frame == null) {
+      // If input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+    final RelNode newInput = frame.r;
+
+    Mappings.TargetMapping mapping =
+            Mappings.target(
+                    frame.oldToNewOutputPos,
+                    oldInput.getRowType().getFieldCount(),
+                    newInput.getRowType().getFieldCount());
+
+    RelCollation oldCollation = rel.getCollation();
+    RelCollation newCollation = RexUtil.apply(mapping, oldCollation);
+
+    final RelNode newSort = HiveSortLimit.create(newInput, newCollation, rel.offset, rel.fetch);
+
+    // Sort does not change input ordering
+    return register(rel, newSort, frame.oldToNewOutputPos,
+            frame.corVarOutputPos);
+  }
+
+  /**
+   * Rewrites a {@link Values}.
+   *
+   * @param rel Values to be rewritten
+   */
+  public Frame decorrelateRel(Values rel) {
+    // There are no inputs, so rel does not need to be changed.
+    return null;
+  }
+
+  /**
+   * Rewrites a {@link LogicalAggregate}.
+   *
+   * @param rel Aggregate to rewrite
+   */
+  public Frame decorrelateRel(LogicalAggregate rel) throws SemanticException{
+    if (rel.getGroupType() != Aggregate.Group.SIMPLE) {
+      throw new AssertionError(Bug.CALCITE_461_FIXED);
+    }
+    //
+    // Rewrite logic:
+    //
+    // 1. Permute the group by keys to the front.
+    // 2. If the input of an aggregate produces correlated variables,
+    //    add them to the group list.
+    // 3. Change aggCalls to reference the new project.
+    //
+
+    // Aggregate itself should not reference cor vars.
+    assert !cm.mapRefRelToCorVar.containsKey(rel);
+
+    final RelNode oldInput = rel.getInput();
+    final Frame frame = getInvoke(oldInput, rel);
+    if (frame == null) {
+      // If input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+
+    //I think this is a bug in Calcite where Aggregate seems to always expect
+    // correlated variable in nodes underneath it which is not true for queries such as
+    // select p.empno, li.mgr from (select distinct empno as empno from emp) p join emp li on p.empno= li.empno where li.sal = 1
+    //    and li.deptno in (select deptno from emp where JOB = 'AIR' AND li.mgr=mgr)
+
+    //assert !frame.corVarOutputPos.isEmpty();
+    final RelNode newInput = frame.r;
+
+    // map from newInput
+    Map<Integer, Integer> mapNewInputToProjOutputPos = Maps.newHashMap();
+    final int oldGroupKeyCount = rel.getGroupSet().cardinality();
+
+    // Project projects the original expressions,
+    // plus any correlated variables the input wants to pass along.
+    final List<Pair<RexNode, String>> projects = Lists.newArrayList();
+
+    List<RelDataTypeField> newInputOutput =
+            newInput.getRowType().getFieldList();
+
+    int newPos = 0;
+
+    // oldInput has the original group by keys in the front.
+    final NavigableMap<Integer, RexLiteral> omittedConstants = new TreeMap<>();
+    for (int i = 0; i < oldGroupKeyCount; i++) {
+      final RexLiteral constant = projectedLiteral(newInput, i);
+      if (constant != null) {
+        // Exclude constants. Aggregate({true}) occurs because Aggregate({})
+        // would generate 1 row even when applied to an empty table.
+        omittedConstants.put(i, constant);
+        continue;
+      }
+      int newInputPos = frame.oldToNewOutputPos.get(i);
+      projects.add(RexInputRef.of2(newInputPos, newInputOutput));
+      mapNewInputToProjOutputPos.put(newInputPos, newPos);
+      newPos++;
+    }
+
+    final SortedMap<Correlation, Integer> mapCorVarToOutputPos = new TreeMap<>();
+    if (!frame.corVarOutputPos.isEmpty()) {
+      // If input produces correlated variables, move them to the front,
+      // right after any existing GROUP BY fields.
+
+      // Now add the corVars from the input, starting from
+      // position oldGroupKeyCount.
+      for (Map.Entry<Correlation, Integer> entry
+              : frame.corVarOutputPos.entrySet()) {
+        projects.add(RexInputRef.of2(entry.getValue(), newInputOutput));
+
+        mapCorVarToOutputPos.put(entry.getKey(), newPos);
+        mapNewInputToProjOutputPos.put(entry.getValue(), newPos);
+        newPos++;
+      }
+    }
+
+    // add the remaining fields
+    final int newGroupKeyCount = newPos;
+    for (int i = 0; i < newInputOutput.size(); i++) {
+      if (!mapNewInputToProjOutputPos.containsKey(i)) {
+        projects.add(RexInputRef.of2(i, newInputOutput));
+        mapNewInputToProjOutputPos.put(i, newPos);
+        newPos++;
+      }
+    }
+
+    assert newPos == newInputOutput.size();
+
+    // This Project will be what the old input maps to,
+    // replacing any previous mapping from old input).
+
+    RelNode newProject = HiveProject.create(newInput, Pair.left(projects), Pair.right(projects));
+
+    // update mappings:
+    // oldInput ----> newInput
+    //
+    //                newProject
+    //                   |
+    // oldInput ----> newInput
+    //
+    // is transformed to
+    //
+    // oldInput ----> newProject
+    //                   |
+    //                newInput
+    Map<Integer, Integer> combinedMap = Maps.newHashMap();
+
+    for (Integer oldInputPos : frame.oldToNewOutputPos.keySet()) {
+      combinedMap.put(oldInputPos,
+              mapNewInputToProjOutputPos.get(
+                      frame.oldToNewOutputPos.get(oldInputPos)));
+    }
+
+    register(oldInput, newProject, combinedMap, mapCorVarToOutputPos);
+
+    // now it's time to rewrite the Aggregate
+    final ImmutableBitSet newGroupSet = ImmutableBitSet.range(newGroupKeyCount);
+    List<AggregateCall> newAggCalls = Lists.newArrayList();
+    List<AggregateCall> oldAggCalls = rel.getAggCallList();
+
+    int oldInputOutputFieldCount = rel.getGroupSet().cardinality();
+    int newInputOutputFieldCount = newGroupSet.cardinality();
+
+    int i = -1;
+    for (AggregateCall oldAggCall : oldAggCalls) {
+      ++i;
+      List<Integer> oldAggArgs = oldAggCall.getArgList();
+
+      List<Integer> aggArgs = Lists.newArrayList();
+
+      // Adjust the aggregator argument positions.
+      // Note aggregator does not change input ordering, so the input
+      // output position mapping can be used to derive the new positions
+      // for the argument.
+      for (int oldPos : oldAggArgs) {
+        aggArgs.add(combinedMap.get(oldPos));
+      }
+      final int filterArg = oldAggCall.filterArg < 0 ? oldAggCall.filterArg
+              : combinedMap.get(oldAggCall.filterArg);
+
+      newAggCalls.add(
+              oldAggCall.adaptTo(newProject, aggArgs, filterArg,
+                      oldGroupKeyCount, newGroupKeyCount));
+
+      // The old to new output position mapping will be the same as that
+      // of newProject, plus any aggregates that the oldAgg produces.
+      combinedMap.put(
+              oldInputOutputFieldCount + i,
+              newInputOutputFieldCount + i);
+    }
+
+    relBuilder.push(
+            LogicalAggregate.create(newProject,
+                    false,
+                    newGroupSet,
+                    null,
+                    newAggCalls));
+
+    if (!omittedConstants.isEmpty()) {
+      final List<RexNode> postProjects = new ArrayList<>(relBuilder.fields());
+      for (Map.Entry<Integer, RexLiteral> entry
+              : omittedConstants.descendingMap().entrySet()) {
+        postProjects.add(entry.getKey() + frame.corVarOutputPos.size(),
+                entry.getValue());
+      }
+      relBuilder.project(postProjects);
+    }
+
+    // Aggregate does not change input ordering so corVars will be
+    // located at the same position as the input newProject.
+    return register(rel, relBuilder.build(), combinedMap, mapCorVarToOutputPos);
+  }
+
+  public Frame getInvoke(RelNode r, RelNode parent) {
+    final Frame frame = dispatcher.invoke(r);
+    if (frame != null) {
+      map.put(r, frame);
+    }
+    currentRel = parent;
+    return frame;
+  }
+
+  /** Returns a literal output field, or null if it is not literal. */
+  private static RexLiteral projectedLiteral(RelNode rel, int i) {
+    if (rel instanceof Project) {
+      final Project project = (Project) rel;
+      final RexNode node = project.getProjects().get(i);
+      if (node instanceof RexLiteral) {
+        return (RexLiteral) node;
+      }
+    }
+    return null;
+  }
+
+  public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{
+    {
+      if (rel.getGroupType() != Aggregate.Group.SIMPLE) {
+        throw new AssertionError(Bug.CALCITE_461_FIXED);
+      }
+      //
+      // Rewrite logic:
+      //
+      // 1. Permute the group by keys to the front.
+      // 2. If the input of an aggregate produces correlated variables,
+      //    add them to the group list.
+      // 3. Change aggCalls to reference the new project.
+      //
+
+      // Aggregate itself should not reference cor vars.
+      assert !cm.mapRefRelToCorVar.containsKey(rel);
+
+      final RelNode oldInput = rel.getInput();
+      final Frame frame = getInvoke(oldInput, rel);
+      if (frame == null) {
+        // If input has not been rewritten, do not rewrite this rel.
+        return null;
+      }
+      //assert !frame.corVarOutputPos.isEmpty();
+      final RelNode newInput = frame.r;
+
+      // map from newInput
+      Map<Integer, Integer> mapNewInputToProjOutputPos = Maps.newHashMap();
+      final int oldGroupKeyCount = rel.getGroupSet().cardinality();
+
+      // Project projects the original expressions,
+      // plus any correlated variables the input wants to pass along.
+      final List<Pair<RexNode, String>> projects = Lists.newArrayList();
+
+      List<RelDataTypeField> newInputOutput =
+              newInput.getRowType().getFieldList();
+
+      int newPos = 0;
+
+      // oldInput has the original group by keys in the front.
+      final NavigableMap<Integer, RexLiteral> omittedConstants = new TreeMap<>();
+      for (int i = 0; i < oldGroupKeyCount; i++) {
+        final RexLiteral constant = projectedLiteral(newInput, i);
+        if (constant != null) {
+          // Exclude constants. Aggregate({true}) occurs because Aggregate({})
+          // would generate 1 row even when applied to an empty table.
+          omittedConstants.put(i, constant);
+          continue;
+        }
+        int newInputPos = frame.oldToNewOutputPos.get(i);
+        projects.add(RexInputRef.of2(newInputPos, newInputOutput));
+        mapNewInputToProjOutputPos.put(newInputPos, newPos);
+        newPos++;
+      }
+
+      final SortedMap<Correlation, Integer> mapCorVarToOutputPos = new TreeMap<>();
+      if (!frame.corVarOutputPos.isEmpty()) {
+        // If input produces correlated variables, move them to the front,
+        // right after any existing GROUP BY fields.
+
+        // Now add the corVars from the input, starting from
+        // position oldGroupKeyCount.
+        for (Map.Entry<Correlation, Integer> entry
+                : frame.corVarOutputPos.entrySet()) {
+          projects.add(RexInputRef.of2(entry.getValue(), newInputOutput));
+
+          mapCorVarToOutputPos.put(entry.getKey(), newPos);
+          mapNewInputToProjOutputPos.put(entry.getValue(), newPos);
+          newPos++;
+        }
+      }
+
+      // add the remaining fields
+      final int newGroupKeyCount = newPos;
+      for (int i = 0; i < newInputOutput.size(); i++) {
+        if (!mapNewInputToProjOutputPos.containsKey(i)) {
+          projects.add(RexInputRef.of2(i, newInputOutput));
+          mapNewInputToProjOutputPos.put(i, newPos);
+          newPos++;
+        }
+      }
+
+      assert newPos == newInputOutput.size();
+
+      // This Project will be what the old input maps to,
+      // replacing any previous mapping from old input).
+      RelNode newProject = HiveProject.create(newInput, Pair.left(projects), Pair.right(projects));
+
+      // update mappings:
+      // oldInput ----> newInput
+      //
+      //                newProject
+      //                   |
+      // oldInput ----> newInput
+      //
+      // is transformed to
+      //
+      // oldInput ----> newProject
+      //                   |
+      //                newInput
+      Map<Integer, Integer> combinedMap = Maps.newHashMap();
+
+      for (Integer oldInputPos : frame.oldToNewOutputPos.keySet()) {
+        combinedMap.put(oldInputPos,
+                mapNewInputToProjOutputPos.get(
+                        frame.oldToNewOutputPos.get(oldInputPos)));
+      }
+
+      register(oldInput, newProject, combinedMap, mapCorVarToOutputPos);
+
+      // now it's time to rewrite the Aggregate
+      final ImmutableBitSet newGroupSet = ImmutableBitSet.range(newGroupKeyCount);
+      List<AggregateCall> newAggCalls = Lists.newArrayList();
+      List<AggregateCall> oldAggCalls = rel.getAggCallList();
+
+      int oldInputOutputFieldCount = rel.getGroupSet().cardinality();
+      int newInputOutputFieldCount = newGroupSet.cardinality();
+
+      int i = -1;
+      for (AggregateCall oldAggCall : oldAggCalls) {
+        ++i;
+        List<Integer> oldAggArgs = oldAggCall.getArgList();
+
+        List<Integer> aggArgs = Lists.newArrayList();
+
+        // Adjust the aggregator argument positions.
+        // Note aggregator does not change input ordering, so the input
+        // output position mapping can be used to derive the new positions
+        // for the argument.
+        for (int oldPos : oldAggArgs) {
+          aggArgs.add(combinedMap.get(oldPos));
+        }
+        final int filterArg = oldAggCall.filterArg < 0 ? oldAggCall.filterArg
+                : combinedMap.get(oldAggCall.filterArg);
+
+        newAggCalls.add(
+                oldAggCall.adaptTo(newProject, aggArgs, filterArg,
+                        oldGroupKeyCount, newGroupKeyCount));
+
+        // The old to new output position mapping will be the same as that
+        // of newProject, plus any aggregates that the oldAgg produces.
+        combinedMap.put(
+                oldInputOutputFieldCount + i,
+                newInputOutputFieldCount + i);
+      }
+
+      relBuilder.push(
+              new HiveAggregate(rel.getCluster(), rel.getTraitSet(), newProject, false, newGroupSet, null, newAggCalls) );
+
+      if (!omittedConstants.isEmpty()) {
+        final List<RexNode> postProjects = new ArrayList<>(relBuilder.fields());
+        for (Map.Entry<Integer, RexLiteral> entry
+                : omittedConstants.descendingMap().entrySet()) {
+          postProjects.add(entry.getKey() + frame.corVarOutputPos.size(),
+                  entry.getValue());
+        }
+        relBuilder.project(postProjects);
+      }
+
+      // Aggregate does not change input ordering so corVars will be
+      // located at the same position as the input newProject.
+      return register(rel, relBuilder.build(), combinedMap, mapCorVarToOutputPos);
+    }
+  }
+
+  public Frame decorrelateRel(HiveProject rel) throws SemanticException{
+    {
+      //
+      // Rewrite logic:
+      //
+      // 1. Pass along any correlated variables coming from the input.
+      //
+
+      final RelNode oldInput = rel.getInput();
+      Frame frame = getInvoke(oldInput, rel);
+      if (frame == null) {
+        // If input has not been rewritten, do not rewrite this rel.
+        return null;
+      }
+      final List<RexNode> oldProjects = rel.getProjects();
+      final List<RelDataTypeField> relOutput = rel.getRowType().getFieldList();
+
+      // LogicalProject projects the original expressions,
+      // plus any correlated variables the input wants to pass along.
+      final List<Pair<RexNode, String>> projects = Lists.newArrayList();
+
+      // If this LogicalProject has correlated reference, create value generator
+      // and produce the correlated variables in the new output.
+      if (cm.mapRefRelToCorVar.containsKey(rel)) {
+        decorrelateInputWithValueGenerator(rel);
+
+        // The old input should be mapped to the LogicalJoin created by
+        // rewriteInputWithValueGenerator().
+        frame = map.get(oldInput);
+      }
+
+      // LogicalProject projects the original expressions
+      final Map<Integer, Integer> mapOldToNewOutputPos = Maps.newHashMap();
+      int newPos;
+      for (newPos = 0; newPos < oldProjects.size(); newPos++) {
+        projects.add(
+                newPos,
+                Pair.of(
+                        decorrelateExpr(oldProjects.get(newPos)),
+                        relOutput.get(newPos).getName()));
+        mapOldToNewOutputPos.put(newPos, newPos);
+      }
+
+
+      // Project any correlated variables the input wants to pass along.
+      // There could be situation e.g. multiple correlated variables refering to
+      //  same outer variable, in which case Project will be created with multiple
+      // fields with same name. Hive doesn't allow HiveProject with multiple fields
+      //  having same name. So to avoid that we keep a set of all fieldnames and
+      // on encountering an existing one a new field/column name is generated
+      final Set<String> corrFieldName = Sets.newHashSet();
+      int pos = 0;
+
+      final SortedMap<Correlation, Integer> mapCorVarToOutputPos = new TreeMap<>();
+      for (Map.Entry<Correlation, Integer> entry : frame.corVarOutputPos.entrySet()) {
+        final RelDataTypeField field = frame.r.getRowType().getFieldList().get(entry.getValue());
+        RexNode projectChild = (RexNode) new RexInputRef(entry.getValue(), field.getType());
+        String fieldName = field.getName();
+        if(corrFieldName.contains(fieldName))
+        {
+          fieldName = SemanticAnalyzer.getColumnInternalName(pos++);
+        }
+
+        projects.add(Pair.of(projectChild ,fieldName));
+        corrFieldName.add(fieldName);
+        mapCorVarToOutputPos.put(entry.getKey(), newPos);
+        newPos++;
+      }
+
+      RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects));
+
+      return register(rel, newProject, mapOldToNewOutputPos,
+              mapCorVarToOutputPos);
+    }
+  }
+  /**
+   * Rewrite LogicalProject.
+   *
+   * @param rel the project rel to rewrite
+   */
+  public Frame decorrelateRel(LogicalProject rel) throws SemanticException{
+    //
+    // Rewrite logic:
+    //
+    // 1. Pass along any correlated variables coming from the input.
+    //
+
+    final RelNode oldInput = rel.getInput();
+    Frame frame = getInvoke(oldInput, rel);
+    if (frame == null) {
+      // If input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+    final List<RexNode> oldProjects = rel.getProjects();
+    final List<RelDataTypeField> relOutput = rel.getRowType().getFieldList();
+
+    // LogicalProject projects the original expressions,
+    // plus any correlated variables the input wants to pass along.
+    final List<Pair<RexNode, String>> projects = Lists.newArrayList();
+
+    // If this LogicalProject has correlated reference, create value generator
+    // and produce the correlated variables in the new output.
+    if (cm.mapRefRelToCorVar.containsKey(rel)) {
+      decorrelateInputWithValueGenerator(rel);
+
+      // The old input should be mapped to the LogicalJoin created by
+      // rewriteInputWithValueGenerator().
+      frame = map.get(oldInput);
+    }
+
+    // LogicalProject projects the original expressions
+    final Map<Integer, Integer> mapOldToNewOutputPos = Maps.newHashMap();
+    int newPos;
+    for (newPos = 0; newPos < oldProjects.size(); newPos++) {
+      projects.add(
+              newPos,
+              Pair.of(
+                      decorrelateExpr(oldProjects.get(newPos)),
+                      relOutput.get(newPos).getName()));
+      mapOldToNewOutputPos.put(newPos, newPos);
+    }
+
+    // Project any correlated variables the input wants to pass along.
+    final SortedMap<Correlation, Integer> mapCorVarToOutputPos = new TreeMap<>();
+    for (Map.Entry<Correlation, Integer> entry : frame.corVarOutputPos.entrySet()) {
+      projects.add(
+              RexInputRef.of2(entry.getValue(),
+                      frame.r.getRowType().getFieldList()));
+      mapCorVarToOutputPos.put(entry.getKey(), newPos);
+      newPos++;
+    }
+
+    RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects));
+
+    return register(rel, newProject, mapOldToNewOutputPos,
+            mapCorVarToOutputPos);
+  }
+
+  /**
+   * Create RelNode tree that produces a list of correlated variables.
+   *
+   * @param correlations         correlated variables to generate
+   * @param valueGenFieldOffset  offset in the output that generated columns
+   *                             will start
+   * @param mapCorVarToOutputPos output positions for the correlated variables
+   *                             generated
+   * @return RelNode the root of the resultant RelNode tree
+   */
+  private RelNode createValueGenerator(
+          Iterable<Correlation> correlations,
+          int valueGenFieldOffset,
+          SortedMap<Correlation, Integer> mapCorVarToOutputPos) {
+    final Map<RelNode, List<Integer>> mapNewInputToOutputPos =
+            new HashMap<>();
+
+    final Map<RelNode, Integer> mapNewInputToNewOffset = new HashMap<>();
+
+    // Input provides the definition of a correlated variable.
+    // Add to map all the referenced positions (relative to each input rel).
+    for (Correlation corVar : correlations) {
+      final int oldCorVarOffset = corVar.field;
+
+      final RelNode oldInput = getCorRel(corVar);
+      assert oldInput != null;
+      final Frame frame = map.get(oldInput);
+      assert frame != null;
+      final RelNode newInput = frame.r;
+
+      final List<Integer> newLocalOutputPosList;
+      if (!mapNewInputToOutputPos.containsKey(newInput)) {
+        newLocalOutputPosList = Lists.newArrayList();
+      } else {
+        newLocalOutputPosList =
+                mapNewInputToOutputPos.get(newInput);
+      }
+
+      final int newCorVarOffset = frame.oldToNewOutputPos.get(oldCorVarOffset);
+
+      // Add all unique positions referenced.
+      if (!newLocalOutputPosList.contains(newCorVarOffset)) {
+        newLocalOutputPosList.add(newCorVarOffset);
+      }
+      mapNewInputToOutputPos.put(newInput, newLocalOutputPosList);
+    }
+
+    int offset = 0;
+
+    // Project only the correlated fields out of each inputRel
+    // and join the projectRel together.
+    // To make sure the plan does not change in terms of join order,
+    // join these rels based on their occurrence in cor var list which
+    // is sorted.
+    final Set<RelNode> joinedInputRelSet = Sets.newHashSet();
+
+    RelNode r = null;
+    for (Correlation corVar : correlations) {
+      final RelNode oldInput = getCorRel(corVar);
+      assert oldInput != null;
+      final RelNode newInput = map.get(oldInput).r;
+      assert newInput != null;
+
+      if (!joinedInputRelSet.contains(newInput)) {
+        RelNode project =
+                RelOptUtil.createProject(
+                        newInput,
+                        mapNewInputToOutputPos.get(newInput));
+        RelNode distinct = RelOptUtil.createDistinctRel(project);
+        RelOptCluster cluster = distinct.getCluster();
+
+        joinedInputRelSet.add(newInput);
+        mapNewInputToNewOffset.put(newInput, offset);
+        offset += distinct.getRowType().getFieldCount();
+
+        if (r == null) {
+          r = distinct;
+        } else {
+          r =
+                  LogicalJoin.create(r, distinct,
+                          cluster.getRexBuilder().makeLiteral(true),
+                          ImmutableSet.<CorrelationId>of(), JoinRelType.INNER);
+        }
+      }
+    }
+
+    // Translate the positions of correlated variables to be relative to
+    // the join output, leaving room for valueGenFieldOffset because
+    // valueGenerators are joined with the original left input of the rel
+    // referencing correlated variables.
+    for (Correlation corVar : correlations) {
+      // The first input of a Correlator is always the rel defining
+      // the correlated variables.
+      final RelNode oldInput = getCorRel(corVar);
+      assert oldInput != null;
+      final Frame frame = map.get(oldInput);
+      final RelNode newInput = frame.r;
+      assert newInput != null;
+
+      final List<Integer> newLocalOutputPosList =
+              mapNewInputToOutputPos.get(newInput);
+
+      final int newLocalOutputPos = frame.oldToNewOutputPos.get(corVar.field);
+
+      // newOutputPos is the index of the cor var in the referenced
+      // position list plus the offset of referenced position list of
+      // each newInput.
+      final int newOutputPos =
+              newLocalOutputPosList.indexOf(newLocalOutputPos)
+                      + mapNewInputToNewOffset.get(newInput)
+                      + valueGenFieldOffset;
+
+      if (mapCorVarToOutputPos.containsKey(corVar)) {
+        assert mapCorVarToOutputPos.get(corVar) == newOutputPos;
+      }
+      mapCorVarToOutputPos.put(corVar, newOutputPos);
+    }
+
+    return r;
+  }
+
+
+  //this returns the source of corVar i.e. Rel which produces cor var
+  // value. Therefore it is always LogicalCorrelate's left input which is outer query
+  private RelNode getCorRel(Correlation corVar) {
+    final RelNode r = cm.mapCorVarToCorRel.get(corVar.corr);
+
+    RelNode ret = r.getInput(0);
+    return ret;
+  }
+
+  private void decorrelateInputWithValueGenerator(RelNode rel) {
+    // currently only handles one input input
+    assert rel.getInputs().size() == 1;
+    RelNode oldInput = rel.getInput(0);
+    final Frame frame = map.get(oldInput);
+
+    final SortedMap<Correlation, Integer> mapCorVarToOutputPos =
+            new TreeMap<>(frame.corVarOutputPos);
+
+    final Collection<Correlation> corVarList = cm.mapRefRelToCorVar.get(rel);
+
+    int leftInputOutputCount = frame.r.getRowType().getFieldCount();
+
+    // can directly add positions into mapCorVarToOutputPos since join
+    // does not change the output ordering from the inputs.
+    RelNode valueGen =
+            createValueGenerator(
+                    corVarList,
+                    leftInputOutputCount,
+                    mapCorVarToOutputPos);
+
+    RelNode join =
+            LogicalJoin.create(frame.r, valueGen, rexBuilder.makeLiteral(true),
+                    ImmutableSet.<CorrelationId>of(), JoinRelType.INNER);
+
+    // LogicalJoin or LogicalFilter does not change the old input ordering. All
+    // input fields from newLeftInput(i.e. the original input to the old
+    // LogicalFilter) are in the output and in the same position.
+    register(oldInput, join, frame.oldToNewOutputPos, mapCorVarToOutputPos);
+  }
+
+  public Frame decorrelateRel(HiveFilter rel) throws SemanticException {
+    {
+      //
+      // Rewrite logic:
+      //
+      // 1. If a LogicalFilter references a correlated field in its filter
+      // condition, rewrite the LogicalFilter to be
+      //   LogicalFilter
+      //     LogicalJoin(cross product)
+      //       OriginalFilterInput
+      //       ValueGenerator(produces distinct sets of correlated variables)
+      // and rewrite the correlated fieldAccess in the filter condition to
+      // reference the LogicalJoin output.
+      //
+      // 2. If LogicalFilter does not reference correlated variables, simply
+      // rewrite the filter condition using new input.
+      //
+
+      final RelNode oldInput = rel.getInput();
+      Frame frame = getInvoke(oldInput, rel);
+      if (frame == null) {
+        // If input has not been rewritten, do not rewrite this rel.
+        return null;
+      }
+
+      // If this LogicalFilter has correlated reference, create value generator
+      // and produce the correlated variables in the new output.
+      if (cm.mapRefRelToCorVar.containsKey(rel)) {
+        decorrelateInputWithValueGenerator(rel);
+
+        // The old input should be mapped to the newly created LogicalJoin by
+        // rewriteInputWithValueGenerator().
+        frame = map.get(oldInput);
+      }
+
+      // Replace the filter expression to reference output of the join
+      // Map filter to the new filter over join
+        RelNode newFilter = new HiveFilter(rel.getCluster(), rel.getTraitSet(), frame.r,
+                decorrelateExpr(rel.getCondition()));
+
+      // Filter does not change the input ordering.
+      // Filter rel does not permute the input.
+      // All corvars produced by filter will have the same output positions in the
+      // input rel.
+      return register(rel, newFilter, frame.oldToNewOutputPos,
+              frame.corVarOutputPos);
+    }
+  }
+
+  /**
+   * Rewrite LogicalFilter.
+   *
+   * @param rel the filter rel to rewrite
+   */
+  public Frame decorrelateRel(LogicalFilter rel) {
+    //
+    // Rewrite logic:
+    //
+    // 1. If a LogicalFilter references a correlated field in its filter
+    // condition, rewrite the LogicalFilter to be
+    //   LogicalFilter
+    //     LogicalJoin(cross product)
+    //       OriginalFilterInput
+    //       ValueGenerator(produces distinct sets of correlated variables)
+    // and rewrite the correlated fieldAccess in the filter condition to
+    // reference the LogicalJoin output.
+    //
+    // 2. If LogicalFilter does not reference correlated variables, simply
+    // rewrite the filter condition using new input.
+    //
+
+    final RelNode oldInput = rel.getInput();
+    Frame frame = getInvoke(oldInput, rel);
+    if (frame == null) {
+      // If input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+
+    // If this LogicalFilter has correlated reference, create value generator
+    // and produce the correlated variables in the new output.
+    if (cm.mapRefRelToCorVar.containsKey(rel)) {
+      decorrelateInputWithValueGenerator(rel);
+
+      // The old input should be mapped to the newly created LogicalJoin by
+      // rewriteInputWithValueGenerator().
+      frame = map.get(oldInput);
+    }
+
+    // Replace the filter expression to reference output of the join
+    // Map filter to the new filter over join
+    RelNode newFilter = new HiveFilter(rel.getCluster(), rel.getTraitSet(), frame.r,
+            decorrelateExpr(rel.getCondition()));
+
+
+    // Filter does not change the input ordering.
+    // Filter rel does not permute the input.
+    // All corvars produced by filter will have the same output positions in the
+    // input rel.
+    return register(rel, newFilter, frame.oldToNewOutputPos,
+            frame.corVarOutputPos);
+  }
+
+  /**
+   * Rewrite Correlator into a left outer join.
+   *
+   * @param rel Correlator
+   */
+  public Frame decorrelateRel(LogicalCorrelate rel) {
+    //
+    // Rewrite logic:
+    //
+    // The original left input will be joined with the new right input that
+    // has generated correlated variables propagated up. For any generated
+    // cor vars that are not used in the join key, pass them along to be
+    // joined later with the CorrelatorRels that produce them.
+    //
+
+    // the right input to Correlator should produce correlated variables
+    final RelNode oldLeft = rel.getInput(0);
+    final RelNode oldRight = rel.getInput(1);
+
+    final Frame leftFrame = getInvoke(oldLeft, rel);
+    final Frame rightFrame = getInvoke(oldRight, rel);
+
+    if (leftFrame == null || rightFrame == null) {
+      // If any input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+
+    if (rightFrame.corVarOutputPos.isEmpty()) {
+      return null;
+    }
+
+    assert rel.getRequiredColumns().cardinality()
+            <= rightFrame.corVarOutputPos.keySet().size();
+
+    // Change correlator rel into a join.
+    // Join all the correlated variables produced by this correlator rel
+    // with the values generated and propagated from the right input
+    final SortedMap<Correlation, Integer> corVarOutputPos =
+            new TreeMap<>(rightFrame.corVarOutputPos);
+    final List<RexNode> conditions = new ArrayList<>();
+    final List<RelDataTypeField> newLeftOutput =
+            leftFrame.r.getRowType().getFieldList();
+    int newLeftFieldCount = newLeftOutput.size();
+
+    final List<RelDataTypeField> newRightOutput =
+            rightFrame.r.getRowType().getFieldList();
+
+    for (Map.Entry<Correlation, Integer> rightOutputPos
+            : Lists.newArrayList(corVarOutputPos.entrySet())) {
+      final Correlation corVar = rightOutputPos.getKey();
+      if (!corVar.corr.equals(rel.getCorrelationId())) {
+        continue;
+      }
+      final int newLeftPos = leftFrame.oldToNewOutputPos.get(corVar.field);
+      final int newRightPos = rightOutputPos.getValue();
+      conditions.add(
+              rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+                      RexInputRef.of(newLeftPos, newLeftOutput),
+                      new RexInputRef(newLeftFieldCount + newRightPos,
+                              newRightOutput.get(newRightPos).getType())));
+
+      // remove this cor var from output position mapping
+      corVarOutputPos.remove(corVar);
+    }
+
+    // Update the output position for the cor vars: only pass on the cor
+    // vars that are not used in the join key.
+    for (Correlation corVar : corVarOutputPos.keySet()) {
+      int newPos = corVarOutputPos.get(corVar) + newLeftFieldCount;
+      corVarOutputPos.put(corVar, newPos);
+    }
+
+    // then add any cor var from the left input. Do not need to change
+    // output positions.
+    corVarOutputPos.putAll(leftFrame.corVarOutputPos);
+
+    // Create the mapping between the output of the old correlation rel
+    // and the new join rel
+    final Map<Integer, Integer> mapOldToNewOutputPos = Maps.newHashMap();
+
+    int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
+
+    int oldRightFieldCount = oldRight.getRowType().getFieldCount();
+    assert rel.getRowType().getFieldCount()
+            == oldLeftFieldCount + oldRightFieldCount;
+
+    // Left input positions are not changed.
+    mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos);
+
+    // Right input positions are shifted by newLeftFieldCount.
+    for (int i = 0; i < oldRightFieldCount; i++) {
+      mapOldToNewOutputPos.put(
+              i + oldLeftFieldCount,
+              rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount);
+    }
+
+    final RexNode condition =
+            RexUtil.composeConjunction(rexBuilder, conditions, false);
+    RelNode newJoin =
+            LogicalJoin.create(leftFrame.r, rightFrame.r, condition,
+                    ImmutableSet.<CorrelationId>of(), rel.getJoinType().toJoinType());
+
+    return register(rel, newJoin, mapOldToNewOutputPos, corVarOutputPos);
+  }
+
+  public Frame decorrelateRel(HiveJoin rel) throws SemanticException{
+    //
+    // Rewrite logic:
+    //
+    // 1. rewrite join condition.
+    // 2. map output positions and produce cor vars if any.
+    //
+
+    final RelNode oldLeft = rel.getInput(0);
+    final RelNode oldRight = rel.getInput(1);
+
+    final Frame leftFrame = getInvoke(oldLeft, rel);
+    final Frame rightFrame = getInvoke(oldRight, rel);
+
+    if (leftFrame == null || rightFrame == null) {
+      // If any input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+
+    final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r, rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType() );
+
+    // Create the mapping between the output of the old correlation rel
+    // and the new join rel
+    Map<Integer, Integer> mapOldToNewOutputPos = Maps.newHashMap();
+
+    int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
+    int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount();
+
+    int oldRightFieldCount = oldRight.getRowType().getFieldCount();
+    assert rel.getRowType().getFieldCount()
+            == oldLeftFieldCount + oldRightFieldCount;
+
+    // Left input positions are not changed.
+    mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos);
+
+    // Right input positions are shifted by newLeftFieldCount.
+    for (int i = 0; i < oldRightFieldCount; i++) {
+      mapOldToNewOutputPos.put(i + oldLeftFieldCount,
+              rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount);
+    }
+
+    final SortedMap<Correlation, Integer> mapCorVarToOutputPos =
+            new TreeMap<>(leftFrame.corVarOutputPos);
+
+    // Right input positions are shifted by newLeftFieldCount.
+    for (Map.Entry<Correlation, Integer> entry
+            : rightFrame.corVarOutputPos.entrySet()) {
+      mapCorVarToOutputPos.put(entry.getKey(),
+              entry.getValue() + newLeftFieldCount);
+    }
+    return register(rel, newJoin, mapOldToNewOutputPos, mapCorVarToOutputPos);
+  }
+  /**
+   * Rewrite LogicalJoin.
+   *
+   * @param rel LogicalJoin
+   */
+  public Frame decorrelateRel(LogicalJoin rel) {
+    //
+    // Rewrite logic:
+    //
+    // 1. rewrite join condition.
+    // 2. map output positions and produce cor vars if any.
+    //
+
+    final RelNode oldLeft = rel.getInput(0);
+    final RelNode oldRight = rel.getInput(1);
+
+    final Frame leftFrame = getInvoke(oldLeft, rel);
+    final Frame rightFrame = getInvoke(oldRight, rel);
+
+    if (leftFrame == null || rightFrame == null) {
+      // If any input has not been rewritten, do not rewrite this rel.
+      return null;
+    }
+
+    final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r,
+            rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType() );
+
+    // Create the mapping between the output of the old correlation rel
+    // and the new join rel
+    Map<Integer, Integer> mapOldToNewOutputPos = Maps.newHashMap();
+
+    int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
+    int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount();
+
+    int oldRightFieldCount = oldRight.getRowType().getFieldCount();
+    assert rel.getRowType().getFieldCount()
+            == oldLeftFieldCount + oldRightFieldCount;
+
+    // Left input positions are not changed.
+    mapOldToNewOutputPos.putAll(leftFrame.oldToNewOutputPos);
+
+    // Right input positions are shifted by newLeftFieldCount.
+    for (int i = 0; i < oldRightFieldCount; i++) {
+      mapOldToNewOutputPos.put(i + oldLeftFieldCount,
+              rightFrame.oldToNewOutputPos.get(i) + newLeftFieldCount);
+    }
+
+    final SortedMap<Correlation, Integer> mapCorVarToOutputPos =
+            new TreeMap<>(leftFrame.corVarOutputPos);
+
+    // Right input positions are shifted by newLeftFieldCount.
+    for (Map.Entry<Correlation, Integer> entry
+            : rightFrame.corVarOutputPos.entrySet()) {
+      mapCorVarToOutputPos.put(entry.getKey(),
+              entry.getValue() + newLeftFieldCount);
+    }
+    return register(rel, newJoin, mapOldToNewOutputPos, mapCorVarToOutputPos);
+  }
+
+  private RexInputRef getNewForOldInputRef(RexInputRef oldInputRef) {
+    assert currentRel != null;
+
+    int oldOrdinal = oldInputRef.getIndex();
+    int newOrdinal = 0;
+
+    // determine which input rel oldOrdinal references, and adjust
+    // oldOrdinal to be relative to that input rel
+    RelNode oldInput = null;
+
+    for (RelNode oldInput0 : currentRel.getInputs()) {
+      RelDataType oldInputType = oldInput0.getRowType();
+      int n = oldInputType.getFieldCount();
+      if (oldOrdinal < n) {
+        oldInput = oldInput0;
+        break;
+      }
+      RelNode newInput = map.get(oldInput0).r;
+      newOrdinal += newInput.getRowType().getFieldCount();
+      oldOrdinal -= n;
+    }
+
+    assert oldInput != null;
+
+    final Frame frame = map.get(oldInput);
+    assert frame != null;
+
+    // now oldOrdinal is relative to oldInput
+    int oldLocalOrdinal = oldOrdinal;
+
+    // figure out the newLocalOrdinal, relative to the newInput.
+    int newLocalOrdinal = oldLocalOrdinal;
+
+    if (!frame.oldToNewOutputPos.isEmpty()) {
+      newLocalOrdinal = frame.oldToNewOutputPos.get(oldLocalOrdinal);
+    }
+
+    newOrdinal += newLocalOrdinal;
+
+    return new RexInputRef(newOrdinal,
+            frame.r.getRowType().getFieldList().get(newLocalOrdinal).getType());
+  }
+
+  /**
+   * Pulls project above the join from its RHS input. Enforces nullability
+   * for join output.
+   *
+   * @param join          Join
+   * @param project       Original project as the right-hand input of the join
+   * @param nullIndicatorPos Position of null indicator
+   * @return the subtree with the new LogicalProject at the root
+   */
+  private RelNode projectJoinOutputWithNullability(
+          LogicalJoin join,
+          LogicalProject project,
+          int nullIndicatorPos) {
+    final RelDataTypeFactory typeFactory = join.getCluster().getTypeFactory();
+    final RelNode left = join.getLeft();
+    final JoinRelType joinType = join.getJoinType();
+
+    RexInputRef nullIndicator =
+            new RexInputRef(
+                    nullIndicatorPos,
+                    typeFactory.createTypeWithNullability(
+                            join.getRowType().getFieldList().get(nullIndicatorPos)
+                                    .getType(),
+                            true));
+
+    // now create the new project
+    List<Pair<RexNode, String>> newProjExprs = Lists.newArrayList();
+
+    // project everything from the LHS and then those from the original
+    // projRel
+    List<RelDataTypeField> leftInputFields =
+            left.getRowType().getFieldList();
+
+    for (int i = 0; i < leftInputFields.size(); i++) {
+      newProjExprs.add(RexInputRef.of2(i, leftInputFields));
+    }
+
+    // Marked where the projected expr is coming from so that the types will
+    // become nullable for the original projections which are now coming out
+    // of the nullable side of the OJ.
+    boolean projectPulledAboveLeftCorrelator =
+            joinType.generatesNullsOnRight();
+
+    for (Pair<RexNode, String> pair : project.getNamedProjects()) {
+      RexNode newProjExpr =
+              removeCorrelationExpr(
+                      pair.left,
+                      projectPulledAboveLeftCorrelator,
+                      nullIndicator);
+
+      newProjExprs.add(Pair.of(newProjExpr, pair.right));
+    }
+
+    return RelOptUtil.createProject(join, newProjExprs, false);
+  }
+
+  /**
+   * Pulls a {@link Project} above a {@link Correlate} from its RHS input.
+   * Enforces nullability for join output.
+   *
+   * @param correlate  Correlate
+   * @param project the original project as the RHS input of the join
+   * @param isCount Positions which are calls to the <code>COUNT</code>
+   *                aggregation function
+   * @return the subtree with the new LogicalProject at the root
+   */
+  private RelNode aggregateCorrelatorOutput(
+          Correlate correlate,
+          LogicalProject project,
+          Set<Integer> isCount) {
+    final RelNode left = correlate.getLeft();
+    final JoinRelType joinType = correlate.getJoinType().toJoinType();
+
+    // now create the new project
+    final List<Pair<RexNode, String>> newProjects = Lists.newArrayList();
+
+    // Project everything from the LHS and then those from the original
+    // project
+    final List<RelDataTypeField> leftInputFields =
+            left.getRowType().getFieldList();
+
+    for (int i = 0; i < leftInputFields.size(); i++) {
+      newProjects.add(RexInputRef.of2(i, leftInputFields));
+    }
+
+    // Marked where the projected expr is coming from so that the types will
+    // become nullable for the original projections which are now coming out
+    // of the nullable side of the OJ.
+    boolean projectPulledAboveLeftCorrelator =
+            joinType.generatesNullsOnRight();
+
+    for (Pair<RexNode, String> pair : project.getNamedProjects()) {
+      RexNode newProjExpr =
+              removeCorrelationExpr(
+                      pair.left,
+                      projectPulledAboveLeftCorrelator,
+                      isCount);
+      newProjects.add(Pair.of(newProjExpr, pair.right));
+    }
+
+    return RelOptUtil.createProject(correlate, newProjects, false);
+  }
+
+  /**
+   * Checks whether the correlations in projRel and filter are related to
+   * the correlated variables provided by corRel.
+   *
+   * @param correlate    Correlate
+   * @param project   The original Project as the RHS input of the join
+   * @param filter    Filter
+   * @param correlatedJoinKeys Correlated join keys
+   * @return true if filter and proj only references corVar provided by corRel
+   */
+  private boolean checkCorVars(
+          LogicalCorrelate correlate,
+          LogicalProject project,
+          LogicalFilter filter,
+          List<RexFieldAccess> correlatedJoinKeys) {
+    if (filter != null) {
+      assert correlatedJoinKeys != null;
+
+      // check that all correlated refs in the filter condition are
+      // used in the join(as field access).
+      Set<Correlation> corVarInFilter =
+              Sets.newHashSet(cm.mapRefRelToCorVar.get(filter));
+
+      for (RexFieldAccess correlatedJoinKey : correlatedJoinKeys) {
+        corVarInFilter.remove(cm.mapFieldAccessToCorVar.get(correlatedJoinKey));
+      }
+
+      if (!corVarInFilter.isEmpty()) {
+        return false;
+      }
+
+      // Check that the correlated variables referenced in these
+      // comparisons do come from the correlatorRel.
+      corVarInFilter.addAll(cm.mapRefRelToCorVar.get(filter));
+
+      for (Correlation corVar : corVarInFilter) {
+        if (cm.mapCorVarToCorRel.get(corVar.corr) != correlate) {
+          return false;
+        }
+      }
+    }
+
+    // if project has any correlated reference, make sure they are also
+    // provided by the current correlate. They will be projected out of the LHS
+    // of the correlate.
+    if ((project != null) && cm.mapRefRelToCorVar.containsKey(project)) {
+      for (Correlation corVar : cm.mapRefRelToCorVar.get(project)) {
+        if (cm.mapCorVarToCorRel.get(corVar.corr) != correlate) {
+          return false;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Remove correlated variables from the tree at root corRel
+   *
+   * @param correlate Correlator
+   */
+  private void removeCorVarFromTree(LogicalCorrelate correlate) {
+    if (cm.mapCorVarToCorRel.get(correlate.getCorrelationId()) == correlate) {
+      cm.mapCorVarToCorRel.remove(correlate.getCorrelationId());
+    }
+  }
+
+  /**
+   * Projects all {@code input} output fields plus the additional expressions.
+   *
+   * @param input        Input relational expression
+   * @param additionalExprs Additional expressions and names
+   * @return the new LogicalProject
+   */
+  private RelNode createProjectWithAdditionalExprs(
+          RelNode input,
+          List<Pair<RexNode, String>> additionalExprs) {
+    final List<RelDataTypeField> fieldList =
+            input.getRowType().getFieldList();
+    List<Pair<RexNode, String>> projects = Lists.newArrayList();
+    for (Ord<RelDataTypeField> field : Ord.zip(fieldList)) {
+      projects.add(
+              Pair.of(
+                      (RexNode) rexBuilder.makeInputRef(
+                              field.e.getType(), field.i),
+                      field.e.getName()));
+    }
+    projects.addAll(additionalExprs);
+    return RelOptUtil.createProject(input, projects, false);
+  }
+
+  /* Returns an immutable map with the identity [0: 0, .., count-1: count-1]. */
+  static Map<Integer, Integer> identityMap(int count) {
+    ImmutableMap.Builder<Integer, Integer> builder = ImmutableMap.builder();
+    for (int i = 0; i < count; i++) {
+      builder.put(i, i);
+    }
+    return builder.build();
+  }
+
+  /** Registers a relational expression and the relational expression it became
+   * after decorrelation. */
+  Frame register(RelNode rel, RelNode newRel,
+                 Map<Integer, Integer> oldToNewOutputPos,
+                 SortedMap<Correlation, Integer> corVarToOutputPos) {
+    assert allLessThan(oldToNewOutputPos.keySet(),
+            newRel.getRowType().getFieldCount(), Litmus.THROW);
+    final Frame frame = new Frame(newRel, corVarToOutputPos, oldToNewOutputPos);
+    map.put(rel, frame);
+    return frame;
+  }
+
+  static boolean allLessThan(Collection<Integer> integers, int limit,
+                             Litmus ret) {
+    for (int value : integers) {
+      if (value >= limit) {
+        return ret.fail("out of range; value: " + value + ", limit: " + limit);
+      }
+    }
+    return ret.succeed();
+  }
+
+  private static RelNode stripHep(RelNode rel) {
+    if (rel instanceof HepRelVertex) {
+      HepRelVertex hepRelVertex = (HepRelVertex) rel;
+      rel = hepRelVertex.getCurrentRel();
+    }
+    return rel;
+  }
+
+  //~ Inner Classes ----------------------------------------------------------
+
+  /** Shuttle that decorrelates. */
+  private class DecorrelateRexShuttle extends RexShuttle {
+    @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) {
+      int newInputOutputOffset = 0;
+      for (RelNode input : currentRel.getInputs()) {
+        final Frame frame = map.get(input);
+
+        if (frame != null) {
+          // try to find in this input rel the position of cor var
+          final Correlation corVar = cm.mapFieldAccessToCorVar.get(fieldAccess);
+
+          if (corVar != null) {
+            Integer newInputPos = frame.corVarOutputPos.get(corVar);
+            if (newInputPos != null) {
+              // This input rel does produce the cor var referenced.
+              // Assume fieldAccess has the correct type info.
+              return new RexInputRef(newInputPos + newInputOutputOffset,
+                      fieldAccess.getType());
+            }
+          }
+
+          // this input rel does not produce the cor var needed
+          newInputOutputOffset += frame.r.getRowType().getFieldCount();
+        } else {
+          // this input rel is not rewritten
+          newInputOutputOffset += input.getRowType().getFieldCount();
+        }
+      }
+      return fieldAccess;
+    }
+
+    @Override public RexNode visitInputRef(RexInputRef inputRef) {
+      return getNewForOldInputRef(inputRef);
+    }
+  }
+
+  /** Shuttle that removes correlations. */
+  private class RemoveCorrelationRexShuttle extends RexShuttle {
+    final RexBuilder rexBuilder;
+    final RelDataTypeFactory typeFactory;
+    final boolean projectPulledAboveLeftCorrelator;
+    final RexInputRef nullIndicator;
+    final ImmutableSet<Integer> isCount;
+
+    public RemoveCorrelationRexShuttle(
+            RexBuilder rexBuilder,
+            boolean projectPulledAboveLeftCorrelator,
+            RexInputRef nullIndicator,
+            Set<Integer> isCount) {
+      this.projectPulledAboveLeftCorrelator =
+              projectPulledAboveLeftCorrelator;
+      this.nullIndicator = nullIndicator; // may be null
+      this.isCount = ImmutableSet.copyOf(isCount);
+      this.rexBuilder = rexBuilder;
+      this.typeFactory = rexBuilder.getTypeFactory();
+    }
+
+    private RexNode createCaseExpression(
+            RexInputRef nullInputRef,
+            RexLiteral lit,
+            RexNode rexNode) {
+      RexNode[] caseOperands = new RexNode[3];
+
+      // Construct a CASE expression to handle the null indicator.
+      //
+      // This also covers the case where a left correlated subquery
+      // projects fields from outer relation. Since LOJ cannot produce
+      // nulls on the LHS, the projection now need to make a nullable LHS
+      // reference using a nullability indicator. If this this indicator
+      // is null, it means the subquery does not produce any value. As a
+      // result, any RHS ref by this usbquery needs to produce null value.
+
+      // WHEN indicator IS NULL
+      caseOperands[0] =
+              rexBuilder.makeCall(
+                      SqlStdOperatorTable.IS_NULL,
+                      new RexInputRef(
+                              nullInputRef.getIndex(),
+                              typeFactory.createTypeWithNullability(
+                                      nullInputRef.getType(),
+                                      true)));
+
+      // THEN CAST(NULL AS newInputTypeNullable)
+      caseOperands[1] =
+              rexBuilder.makeCast(
+                      typeFactory.createTypeWithNullability(
+                              rexNode.getType(),
+                              true),
+                      lit);
+
+      // ELSE cast (newInput AS newInputTypeNullable) END
+      caseOperands[2] =
+              rexBuilder.makeCast(
+                      typeFactory.createTypeWithNullability(
+                              rexNode.getType(),
+                              true),
+                      rexNode);
+
+      return rexBuilder.makeCall(
+              SqlStdOperatorTable.CASE,
+              caseOperands);
+    }
+
+    @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) {
+      if (cm.mapFieldAccessToCorVar.containsKey(fieldAccess)) {
+        // if it is a corVar, change it to be input ref.
+        Correlation corVar = cm.mapFieldAccessToCorVar.get(fieldAccess);
+
+        // corVar offset should point to the leftInput of currentRel,
+        // which is the Correlator.
+        RexNode newRexNode =
+                new RexInputRef(corVar.field, fieldAccess.getType());
+
+        if (projectPulledAboveLeftCorrelator
+                && (nullIndicator != null)) {
+          // need to enforce nullability by applying an additional
+          // cast operator over the transformed expression.
+          newRexNode =
+                  createCaseExpression(
+                          nullIndicator,
+                          rexBuilder.constantNull(),
+                          newRexNode);
+        }
+        return newRexNode;
+      }
+      return fieldAccess;
+    }
+
+    @Override public RexNode visitInputRef(RexInputRef inputRef) {
+      if (currentRel instanceof LogicalCorrelate) {
+        // if this rel references corVar
+        // and now it needs to be rewritten
+        // it must have been pulled above the Correlator
+        // replace the input ref to account for the LHS of the
+        // Correlator
+        final int leftInputFieldCount =
+                ((LogicalCorrelate) currentRel).getLeft().getRowType()
+                        .getFieldCount();
+        RelDataType newType = inputRef.getType();
+
+        if (projectPulledAboveLeftCorrelator) {
+          newType =
+                  typeFactory.createTypeWithNullability(newType, true);
+        }
+
+        int pos = inputRef.getIndex();
+        RexInputRef newInputRef =
+                new RexInputRef(leftInputFieldCount + pos, newType);
+
+        if ((isCount != null) && isCount.contains(pos)) {
+          return createCaseExpression(
+                  newInputRef,
+                  rexBuilder.makeExactLiteral(BigDecimal.ZERO),
+                  newInputRef);
+        } else {
+          return newInputRef;
+        }
+      }
+      return inputRef;
+    }
+
+    @Override public RexNode visitLiteral(RexLiteral literal) {
+      // Use nullIndicator to decide whether to project null.
+      // Do nothing if the literal is null.
+      if (!RexUtil.isNull(literal)
+              && projectPulledAboveLeftCorrelator
+              && (nullIndicator != null)) {
+        return createCaseExpression(
+                nullIndicator,
+                rexBuilder.constantNull(),
+                literal);
+      }
+      return literal;
+    }
+
+    @Override public RexNode visitCall(final RexCall call) {
+      RexNode newCall;
+
+      boolean[] update = {false};
+      List<RexNode> clonedOperands = visitList(call.operands, update);
+      if (update[0]) {
+        SqlOperator operator = call.getOperator();
+
+        boolean isSpecialCast = false;
+        if (operator instanceof SqlFunction) {
+          SqlFunction function = (SqlFunction) operator;
+          if (function.getKind() == SqlKind.CAST) {
+            if (call.operands.size() < 2) {
+              isSpecialCast = true;
+            }
+          }
+        }
+
+        final RelDataType newType;
+        if (!isSpecialCast) {
+          // TODO: ideally this only needs to be called if the result
+          // type will also change. However, since that requires
+          // support from type inference rules to tell whether a rule
+          // decides return type based on input types, for now all
+          // operators will be recreated with new type if any operand
+          // changed, unless the operator has "built-in" type.
+          newType = rexBuilder.deriveReturnType(operator, clonedOperands);
+        } else {
+          // Use the current return type when creating a new call, for
+          // operators with return type built into the operator
+          // definition, and with no type inference rules, such as
+          // cast function with less than 2 operands.
+
+          // TODO: Comments in RexShuttle.visitCall() mention other
+          // types in this category. Need to resolve those together
+          // and preferably in the base class RexShuttle.
+          newType = call.getType();
+        }
+        newCall =
+                rexBuilder.makeCall(
+                        newType,
+                        operator,
+                        clonedOperands);
+      } else {
+        newCall = call;
+      }
+
+      if (projectPulledAboveLeftCorrelator && (nullIndicator != null)) {
+        return createCaseExpression(
+                nullIndicator,
+                rexBuilder.constantNull(),
+                newCall);
+      }
+      return newCall;
+    }
+  }
+
+  /**
+   * Rule to remove single_value rel. For cases like
+   *
+   * <blockquote>AggRel single_value proj/filter/agg/ join on unique LHS key
+   * AggRel single group</blockquote>
+   */
+  private final class RemoveSingleAggregateRule extends RelOptRule {
+    public RemoveSingleAggregateRule() {
+      super(
+              operand(
+                      LogicalAggregate.class,
+                      operand(
+                              LogicalProject.class,
+                              operand(LogicalAggregate.class, any()))));
+    }
+
+    public void onMatch(RelOptRuleCall call) {
+      LogicalAggregate singleAggregate = call.rel(0);
+      LogicalProject project = call.rel(1);
+      LogicalAggregate aggregate = call.rel(2);
+
+      // check singleAggRel is single_value agg
+      if ((!singleAggregate.getGroupSet().isEmpty())
+              || (singleAggregate.getAggCallList().size() != 1)
+              || !(singleAggregate.getAggCallList().get(0).getAggregation()
+              instanceof SqlSingleValueAggFunction)) {
+        return;
+      }
+
+      // check projRel only projects one expression
+      // check this project only projects one expression, i.e. scalar
+      // subqueries.
+      List<RexNode> projExprs = project.getProjects();
+      if (projExprs.size() != 1) {
+        return;
+      }
+
+      // check the input to projRel is an aggregate on the entire input
+      if (!aggregate.getGroupSet().isEmpty()) {
+        return;
+      }
+
+      // singleAggRel produces a nullable type, so create the new
+      // projection that casts proj expr to a nullable type.
+      final RelOptCluster cluster = project.getCluster();
+      RelNode newProject =
+              RelOptUtil.createProject(aggregate,
+                      ImmutableList.of(
+                              rexBuilder.makeCast(
+                                      cluster.getTypeFactory().createTypeWithNullability(
+                                              projExprs.get(0).getType(),
+                                              true),
+                                      projExprs.get(0))),
+                      null);
+      call.transformTo(newProject);
+    }
+  }
+
+  /** Planner rule that removes correlations for scalar projects. */
+  private final class RemoveCorrelationForScalarProjectRule extends RelOptRule {
+    public RemoveCorrelationForScalarProjectRule() {
+      super(
+              operand(LogicalCorrelate.class,
+                      operand(RelNode.class, any()),
+                      operand(LogicalAggregate.class,
+                              operand(LogicalProject.class,
+                                      operand(RelNode.class, any())))));
+    }
+
+    public void onMatch(RelOptRuleCall call) {
+      final LogicalCorrelate correlate = call.rel(0);
+      final RelNode left = call.rel(1);
+      final LogicalAggregate aggregate = call.rel(2);
+      final LogicalProject project = call.rel(3);
+      RelNode right = call.rel(4);
+      final RelOptCluster cluster = correlate.getCluster();
+
+      setCurrent(call.getPlanner().getRoot(), correlate);
+
+      // Check for this pattern.
+      // The pattern matching could be simplified if rules can be applied
+      // during decorrelation.
+      //
+      // CorrelateRel(left correlation, condition = true)
+      //   LeftInputRel
+      //   LogicalAggregate (groupby (0) single_value())
+      //     LogicalProject-A (may reference coVar)
+      //       RightInputRel
+      final JoinRelType joinType = correlate.getJoinType().toJoinType();
+
+      // corRel.getCondition was here, however Correlate was updated so it
+      // never includes a join condition. The code was not modified for brevity.
+      RexNode joinCond = rexBuilder.makeLiteral(true);
+      if ((joinType != JoinRelType.LEFT)
+              || (joinCond != rexBuilder.makeLiteral(true))) {
+        return;
+      }
+
+      // check that the agg is of the following type:
+      // doing a single_value() on the entire input
+      if ((!aggregate.getGroupSet().isEmpty())
+              || (aggregate.getAggCallList().size() != 1)
+              || !(aggregate.getAggCallList().get(0).getAggregation()
+              instanceof SqlSingleValueAggFunction)) {
+        return;
+      }
+
+      // check this project only projects one expression, i.e. scalar
+      // subqueries.
+      if (project.getProjects().size() != 1) {
+        return;
+      }
+
+      int nullIndicatorPos;
+
+      if ((right instanceof LogicalFilter)
+              && cm.mapRefRelToCorVar.containsKey(right)) {
+        // rightInputRel has this shape:
+        //
+        //       LogicalFilter (references corvar)
+        //         FilterInputRel
+
+        // If rightInputRel is a filter and contains correlated
+        // reference, make sure the correlated keys in the filter
+        // condition forms a unique key of the RHS.
+
+        LogicalFilter filter = (LogicalFilter) right;
+        right = filter.getInput();
+
+        assert right instanceof HepRelVertex;
+        right = ((HepRelVertex) right).getCurrentRel();
+
+        // check filter input contains no correlation
+        if (RelOptUtil.getVariablesUsed(right).size() > 0) {
+          return;
+        }
+
+        // extract the correlation out of the filter
+
+        // First breaking up the filter conditions into equality
+        // comparisons between rightJoinKeys(from the original
+        // filterInputRel) and correlatedJoinKeys. correlatedJoinKeys
+        // can be expressions, while rightJoinKeys need to be input
+        // refs. These comparisons are AND'ed together.
+        List<RexNode> tmpRightJoinKeys = Lists.newArrayList();
+        List<RexNode> correlatedJoinKeys = Lists.newArrayList();
+        RelOptUtil.splitCorrelatedFilterCondition(
+                filter,
+                tmpRightJoinKeys,
+                correlatedJoinKeys,
+                false);
+
+        // check that the columns referenced in these comparisons form
+        // an unique key of the filterInputRel
+        final List<RexInputRef> rightJoinKeys = new ArrayList<>();
+        for (RexNode key : tmpRightJoinKeys) {
+          assert key instanceof RexInputRef;
+          rightJoinKeys.add((RexInputRef) key);
+        }
+
+        // check that the columns referenced in rightJoinKeys form an
+        // unique key of the filterInputRel
+        if (rightJoinKeys.isEmpty()) {
+          return;
+        }
+
+        // The join filters out the nulls.  So, it's ok if there are
+        // nulls in the join keys.
+        final RelMetadataQuery mq = RelMetadataQuery.instance();
+        if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, right,
+                rightJoinKeys)) {
+          //SQL2REL_LOGGER.fine(rightJoinKeys.toString()
+           //       + "are not unique keys for "
+            //      + right.toString());
+          return;
+        }
+
+        RexUtil.FieldAccessFinder visitor =
+                new RexUtil.FieldAccessFinder();
+        RexUtil.apply(visitor, correlatedJoinKeys, null);
+        List<RexFieldAccess> correlatedKeyList =
+                visitor.getFieldAccessList();
+
+        if (!checkCorVars(correlate, project, filter, correlatedKeyList)) {
+          return;
+        }
+
+        // Change the plan to this structure.
+        // Note that the aggregateRel is removed.
+        //
+        // LogicalProject-A' (replace corvar to input ref from the LogicalJoin)
+        //   LogicalJoin (replace corvar to input ref from LeftInputRel)
+        //     LeftInputRel
+        //     RightInputRel(oreviously FilterInputRel)
+
+        // Change the filter condition into a join condition
+        joinCond =
+                removeCorrelationExpr(filter.getCondition(), false);
+
+        nullIndicatorPos =
+                left.getRowType().getFieldCount()
+                        + rightJoinKeys.get(0).getIndex();
+      } else if (cm.mapRefRelToCorVar.containsKey(project)) {
+        // check filter input contains no correlation
+        if (RelOptUtil.getVariablesUsed(right).size() > 0) {
+          return;
+        }
+
+        if (!checkCorVars(correlate, project, null, null)) {
+          return;
+        }
+
+        // Change the plan to this structure.
+        //
+        // LogicalProject-A' (replace corvar to input ref from LogicalJoin)
+        //   LogicalJoin (left, condition = true)
+        //     LeftInputRel
+        //     LogicalAggregate(groupby(0), single_value(0), s_v(1)....)
+        //       LogicalProject-B (everything from input plus literal true)
+        //         ProjInputRel
+
+        // make the new projRel to provide a null indicator
+        right =
+                createProjectWithAdditionalExprs(right,
+                        ImmutableList.of(
+                                Pair.<RexNode, String>of(
+                                        rexBuilder.makeLiteral(true), "nullIndicator")));
+
+        // make the new aggRel
+        right =
+                RelOptUtil.createSingleValueAggRel(cluster, right);
+
+        // The last field:
+        //     single_value(true)
+        // is the nullIndicator
+        nullIndicatorPos =
+                left.getRowType().getFieldCount()
+                        + right.getRowType().getFieldCount() - 1;
+      } else {
+        return;
+      }
+
+      // make the new join rel
+      LogicalJoin join =
+              LogicalJoin.create(left, right, joinCond,
+                      ImmutableSet.<CorrelationId>of(), joinType);
+
+      RelNode newProject =
+              projectJoinOutputWithNullability(join, project, nullIndicatorPos);
+
+      call.transformTo(newProject);
+
+      removeCorVarFromTree(correlate);
+    }
+  }
+
+  /** Planner rule that removes correlations for scalar aggregates. */
+  private final class RemoveCorrelationForScalarAggregateRule
+          extends RelOptRule {
+    public RemoveCorrelationForScalarAggregateRule() {
+      super(
+              operand(LogicalCorrelate.class,
+                      operand(RelNode.class, any()),
+                      operand(LogicalProject.class,
+                              operand(LogicalAggregate.class, null, Aggregate.IS_SIMPLE,
+                                      operand(LogicalProject.class,
+                                              operand(RelNode.class, any()))))));
+    }
+
+    public void onMatch(RelOptRuleCall call) {
+      final LogicalCorrelate correlate = call.rel(0);
+      final RelNode left = call.rel(1);
+      final LogicalProject aggOutputProject = call.rel(2);
+      final LogicalAggregate aggregate = call.rel(3);
+      final LogicalProject aggInputProject = call.rel(4);
+      RelNode right = call.rel(5);
+      final RelOptCluster cluster = correlate.getCluster();
+
+      setCurrent(call.getPlanner().getRoot(), correlate);
+
+      // check for this pattern
+      // The pattern matching could be simplified if rules can be applied
+      // during decorrelation,
+      //
+      // CorrelateRel(left correlation, condition = true)
+      //   LeftInputRel
+      //   LogicalProject-A (a RexNode)
+      //     LogicalAggregate (groupby (0), agg0(), agg1()...)
+      //       LogicalProject-B (references coVar)
+      //         rightInputRel
+
+      // check aggOutputProject projects only one expression
+      final List<RexNode> aggOutputProjects = aggOutputProject.getProjects();
+      if (aggOutputProjects.size() != 1) {
+        return;
+      }
+
+      final JoinRelType joinType = correlate.getJoinType().toJoinType();
+      // corRel.getCondition was here, however Correlate was updated so it
+      // never includes a join condition. The code was not modified for brevity.
+      RexNode joinCond = rexBuilder.makeLiteral(true);
+      if ((joinType != JoinRelType.LEFT)
+              || (joinCond != rexBuilder.makeLiteral(true))) {
+        return;
+      }
+
+      // check that the agg is on the entire input
+      if (!aggregate.getGroupSet().isEmpty()) {
+        return;
+      }
+
+      final List<RexNode> aggInputProjects = aggInputProject.getProjects();
+
+      final List<AggregateCall> aggCalls = aggregate.getAggCallList();
+      final Set<Integer> isCountStar = Sets.newHashSet();
+
+      // mark if agg produces count(*) which needs to reference the
+      // nullIndicator after the transformation.
+      int k = -1;
+      for (AggregateCall aggCall : aggCalls) {
+        ++k;
+        if ((aggCall.getAggregation() instanceof SqlCountAggFunction)
+                && (aggCall.getArgList().size() == 0)) {
+          isCountStar.add(k);
+        }
+      }
+
+      if ((right instanceof LogicalFilter)
+              && cm.mapRefRelToCorVar.containsKey(right)) {
+        // rightInputRel has this shape:
+        //
+        //       LogicalFilter (references corvar)
+        //         FilterInputRel
+        LogicalFilter filter = (LogicalFilter) right;
+        right = filter.getInput();
+
+        assert right instanceof HepRelVertex;
+        right = ((HepRelVertex) right).getCurrentRel();
+
+        // check filter input contains no correlation
+        if (RelOptUtil.getVariablesUsed(right).size() > 0) {
+          return;
+        }
+
+        // check filter condition type First extract the correlation out
+        // of the filter
+
+        // First breaking up the filter conditions into equality
+        // comparisons between rightJoinKeys(from the original
+        // filterInputRel) and correlatedJoinKeys. correlatedJoinKeys
+        // can only be RexFieldAccess, while rightJoinKeys can be
+        // expressions. These comparisons are AND'ed together.
+        List<RexNode> rightJoinKeys = Lists.newArrayList();
+        List<RexNode> tmpCorrelatedJoinKeys = Lists.newArrayList();
+        RelOptUtil.splitCorrelatedFilterCondition(
+                filter,
+                rightJoinKeys,
+                tmpCorrelatedJoinKeys,
+                true);
+
+        // make sure th

<TRUNCATED>

[02/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
index c7e1f02..7385b4c 100644
--- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
+++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
@@ -41,60 +41,147 @@ POSTHOOK: query: -- non agg, corr
 explain select * from src11 where src11.key1 in (select key from src where src11.value1 = value and key > '9')
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: src11
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-            Filter Operator
-              predicate: ((key1 > '9') and value1 is not null) (type: boolean)
+            Select Operator
+              expressions: value1 (type: string)
+              outputColumnNames: value1
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              Select Operator
-                expressions: key1 (type: string), value1 (type: string)
-                outputColumnNames: _col0, _col1
+              Group By Operator
+                keys: value1 (type: string)
+                mode: hash
+                outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
+              predicate: (key > '9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col1 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col2 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src11
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Select Operator
+              expressions: key1 (type: string), value1 (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -111,60 +198,147 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from src a where a.key in (select key from src where a.value = value and key > '9')
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string)
+              outputColumnNames: value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: value (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
+              predicate: (key > '9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col1 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col2 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -196,8 +370,11 @@ from part b where b.p_size in
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-3
+  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-4 depends on stages: Stage-3
+  Stage-5 depends on stages: Stage-4
+  Stage-1 depends on stages: Stage-5
+  Stage-6 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
@@ -207,15 +384,12 @@ STAGE PLANS:
           TableScan
             alias: part2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-            Filter Operator
-              predicate: p2_mfgr is not null (type: boolean)
+            Reduce Output Operator
+              key expressions: p2_mfgr (type: string), p2_size (type: int)
+              sort order: ++
+              Map-reduce partition columns: p2_mfgr (type: string)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-              Reduce Output Operator
-                key expressions: p2_mfgr (type: string), p2_size (type: int)
-                sort order: ++
-                Map-reduce partition columns: p2_mfgr (type: string)
-                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                TopN Hash Memory Usage: 0.1
+              TopN Hash Memory Usage: 0.1
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
@@ -249,18 +423,12 @@ STAGE PLANS:
                 expressions: _col2 (type: string), _col5 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                Group By Operator
-                  aggregations: min(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-3
     Map Reduce
@@ -272,21 +440,31 @@ STAGE PLANS:
               Map-reduce partition columns: _col0 (type: string)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col1 (type: int)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Group By Operator
-          aggregations: min(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-          Filter Operator
-            predicate: _col1 is not null (type: boolean)
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col1 (type: int)
+            outputColumnNames: _col2, _col1
+            Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
             Group By Operator
-              keys: _col0 (type: string), _col1 (type: int)
+              aggregations: min(_col1)
+              keys: _col2 (type: string)
               mode: hash
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
                 table:
@@ -294,38 +472,90 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: int), _col0 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: b
             Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (p_mfgr is not null and p_size is not null) (type: boolean)
+            Select Operator
+              expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
-                outputColumnNames: _col0, _col1, _col2
+              Reduce Output Operator
+                key expressions: _col1 (type: string), _col2 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
                 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col1 (type: string), _col2 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+                value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: int)
+              key expressions: _col1 (type: string), _col0 (type: int)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+              Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col1 (type: string), _col2 (type: int)
-            1 _col0 (type: string), _col1 (type: int)
+            1 _col1 (type: string), _col0 (type: int)
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -340,6 +570,39 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_mfgr (type: string)
+              outputColumnNames: p_mfgr
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
@@ -364,8 +627,11 @@ from part b where b.p_size in
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-3
+  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-4 depends on stages: Stage-3
+  Stage-5 depends on stages: Stage-4
+  Stage-1 depends on stages: Stage-5
+  Stage-6 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
@@ -375,15 +641,12 @@ STAGE PLANS:
           TableScan
             alias: part
             Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: p_mfgr is not null (type: boolean)
+            Reduce Output Operator
+              key expressions: p_mfgr (type: string), p_size (type: int)
+              sort order: ++
+              Map-reduce partition columns: p_mfgr (type: string)
               Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: p_mfgr (type: string), p_size (type: int)
-                sort order: ++
-                Map-reduce partition columns: p_mfgr (type: string)
-                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                TopN Hash Memory Usage: 0.1
+              TopN Hash Memory Usage: 0.1
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
@@ -417,18 +680,12 @@ STAGE PLANS:
                 expressions: _col2 (type: string), _col5 (type: int)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: min(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-3
     Map Reduce
@@ -440,27 +697,92 @@ STAGE PLANS:
               Map-reduce partition columns: _col0 (type: string)
               Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col1 (type: int)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col1 (type: int)
+            outputColumnNames: _col2, _col1
+            Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: min(_col1)
+              keys: _col2 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col1 (type: int)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+              Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          aggregations: min(VALUE._col0)
-          keys: KEY._col0 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: _col1 is not null (type: boolean)
-            Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-            Group By Operator
-              keys: _col0 (type: string), _col1 (type: int)
-              mode: hash
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: int), _col0 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Map Reduce
@@ -468,32 +790,29 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: (p_mfgr is not null and p_size is not null) (type: boolean)
+            Select Operator
+              expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
-                outputColumnNames: _col0, _col1, _col2
+              Reduce Output Operator
+                key expressions: _col1 (type: string), _col2 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
                 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col1 (type: string), _col2 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+                value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: int)
+              key expressions: _col1 (type: string), _col0 (type: int)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
-              Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+              Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col1 (type: string), _col2 (type: int)
-            1 _col0 (type: string), _col1 (type: int)
+            1 _col1 (type: string), _col0 (type: int)
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
           Select Operator
@@ -508,6 +827,39 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_mfgr (type: string)
+              outputColumnNames: p_mfgr
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
@@ -535,11 +887,46 @@ where b.key in
         )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string)
+              outputColumnNames: value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: value (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
@@ -547,29 +934,65 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
+              predicate: (key > '9') (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                keys: key (type: string), value (type: string)
-                mode: hash
+              Select Operator
+                expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col0 (type: string), _col2 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
             keys: _col0 (type: string), _col1 (type: string)
-            mode: hash
+            mode: complete
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -583,36 +1006,33 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string), _col1 (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: string), _col1 (type: string)
               sort order: ++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
             1 _col0 (type: string), _col1 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -640,8 +1060,11 @@ having count(*) in (select count(*) from src where src.key > '9'  and src.value
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-3
-  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-5
+  Stage-6 is a root stage
+  Stage-3 depends on stages: Stage-6
+  Stage-4 depends on stages: Stage-3
+  Stage-5 depends on stages: Stage-4
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -651,25 +1074,22 @@ STAGE PLANS:
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: value is not null (type: boolean)
+            Select Operator
+              expressions: value (type: string), key (type: string)
+              outputColumnNames: value, key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: value, key
+              Group By Operator
+                aggregations: count()
+                keys: value (type: string), key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  keys: value (type: string), key (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: bigint)
+                  value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -677,19 +1097,16 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: _col2 is not null (type: boolean)
+          Select Operator
+            expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
+            outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
-              outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
@@ -703,17 +1120,17 @@ STAGE PLANS:
               value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: bigint)
+              key expressions: _col1 (type: string), _col0 (type: bigint)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
+              Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Semi Join 0 to 1
+               Inner Join 0 to 1
           keys:
             0 _col1 (type: string), _col2 (type: bigint)
-            1 _col0 (type: string), _col1 (type: bigint)
+            1 _col1 (type: string), _col0 (type: bigint)
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
@@ -724,60 +1141,157 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string), key (type: string)
+              outputColumnNames: value, key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: value (type: string), key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string)
+            outputColumnNames: _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string)
+              mode: complete
+              outputColumnNames: _col0
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((key > '9') and value is not null) (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: value (type: string), key (type: string)
-                outputColumnNames: value, key
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  keys: value (type: string), key (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: bigint)
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key > '9') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col0 (type: string)
+            outputColumnNames: _col2, _col0
+            Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: count()
+              keys: _col2 (type: string), _col0 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col2 (type: bigint)
-            outputColumnNames: _col0, _col2
-            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: _col2 is not null (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: _col0 (type: string), _col2 (type: bigint)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col1 (type: bigint)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            outputColumnNames: _col1, _col2
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              keys: _col1 (type: string), _col2 (type: bigint)
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col0 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 45 Data size: 479 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -785,7 +1299,6 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: -- non agg, corr
 explain
 select p_mfgr, b.p_name, p_size 
@@ -808,10 +1321,17 @@ where b.p_name not in
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-3 is a root stage
-  Stage-4 depends on stages: Stage-3
-  Stage-1 depends on stages: Stage-4
-  Stage-2 depends on stages: Stage-1, Stage-5
-  Stage-5 is a root stage
+  Stage-4 depends on stages: Stage-3, Stage-6
+  Stage-5 depends on stages: Stage-4
+  Stage-1 depends on stages: Stage-5
+  Stage-2 depends on stages: Stage-1, Stage-10
+  Stage-6 is a root stage
+  Stage-7 is a root stage
+  Stage-8 depends on stages: Stage-7, Stage-11
+  Stage-9 depends on stages: Stage-8
+  Stage-10 depends on stages: Stage-9, Stage-12
+  Stage-11 is a root stage
+  Stage-12 is a root stage
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
@@ -855,41 +1375,54 @@ STAGE PLANS:
                         isPivotResult: true
             Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean)
+              predicate: (rank_window_0 <= 2) (type: boolean)
               Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
               Select Operator
+                expressions: _col2 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: bigint)
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: string), _col1 (type: string)
+            outputColumnNames: _col2, _col1
+            Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: count(), count(_col1)
+              keys: _col2 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
                 table:
@@ -897,6 +1430,30 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -908,22 +1465,27 @@ STAGE PLANS:
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
-                sort order: 
+                key expressions: _col1 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col1 (type: string)
                 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                value expressions: _col0 (type: string), _col2 (type: int)
           TableScan
             Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Inner Join 0 to 1
+               Left Outer Join0 to 1
           keys:
-            0 
-            1 
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col4, _col5
+          Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -939,39 +1501,73 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string)
               sort order: ++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col2 (type: int)
+              Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint)
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: string)
+              key expressions: _col3 (type: string), _col1 (type: string)
               sort order: ++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+              Map-reduce partition columns: _col3 (type: string), _col1 (type: string)
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
                Left Outer Join0 to 1
           keys:
             0 _col0 (type: string), _col1 (type: string)
-            1 _col0 (type: string), _col1 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col4
-          Statistics: Num rows: 28 Data size: 3719 Basic stats: COMPLETE Column stats: NONE
+            1 _col3 (type: string), _col1 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
+          Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col4 is null (type: boolean)
-            Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE
+            predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+            Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 14 Data size: 1859 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-5
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_mfgr (type: string)
+              outputColumnNames: p_mfgr
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -1014,7 +1610,7 @@ STAGE PLANS:
               predicate: (rank_window_0 <= 2) (type: boolean)
               Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: _col1 (type: string), _col2 (type: string)
+                expressions: _col2 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
@@ -1024,6 +1620,167 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            keys: _col1 (type: string), _col2 (type: string)
+            mode: hash
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-9
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), true (type: boolean)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string), _col2 (type: boolean)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1, _col2, _col3
+          Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_mfgr (type: string)
+              outputColumnNames: p_mfgr
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: p_mfgr (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-12
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: p_name (type: string)
+              outputColumnNames: p_name
+              Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: p_name (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1

[14/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/llap/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index 321f1cc..e71add5 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -23,7 +23,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -31,19 +32,16 @@ STAGE PLANS:
                 TableScan
                   alias: src
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: (key > '9') (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string)
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -54,20 +52,16 @@ STAGE PLANS:
                   Filter Operator
                     predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string)
+                    Group By Operator
+                      keys: key (type: string)
+                      mode: hash
                       outputColumnNames: _col0
-                      Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
+                      Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -75,19 +69,32 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -147,7 +154,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -155,18 +165,15 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -175,22 +182,35 @@ STAGE PLANS:
                   alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: string), _col1 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: string)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                          Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -198,19 +218,66 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -278,9 +345,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
         Reducer 4 <- Map 3 (SIMPLE_EDGE)
         Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -288,19 +356,16 @@ STAGE PLANS:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: UDFToDouble(p_size) is not null (type: boolean)
+                  Select Operator
+                    expressions: p_name (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col2 (type: double)
-                        sort order: +
-                        Map-reduce partition columns: _col2 (type: double)
-                        Statistics: Num rows: 26 Data size: 3458 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string), _col1 (type: int)
+                    Reduce Output Operator
+                      key expressions: UDFToDouble(_col1) (type: double)
+                      sort order: +
+                      Map-reduce partition columns: UDFToDouble(_col1) (type: double)
+                      Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: string), _col1 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -321,9 +386,9 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: double)
+                  0 UDFToDouble(_col1) (type: double)
                   1 _col0 (type: double)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE
@@ -386,19 +451,29 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: double)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: double)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: double)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: double)
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: double)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: double)
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: double)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: double)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -455,9 +530,12 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
         Reducer 4 <- Map 3 (SIMPLE_EDGE)
-        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -465,19 +543,16 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: (p_mfgr is not null and p_size is not null) (type: boolean)
+                  Select Operator
+                    expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
-                      outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string), _col2 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
                       Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string), _col2 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
-                        Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string)
+                      value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -485,15 +560,29 @@ STAGE PLANS:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: p_mfgr is not null (type: boolean)
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_size (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
                     Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_mfgr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
-                      key expressions: p_mfgr (type: string), p_size (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: p_mfgr (type: string)
-                      Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE
-                      TopN Hash Memory Usage: 0.1
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -501,10 +590,10 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col1 (type: string), _col2 (type: int)
-                  1 _col0 (type: string), _col1 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
@@ -552,41 +641,89 @@ STAGE PLANS:
                     Select Operator
                       expressions: _col2 (type: string), _col5 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        aggregations: min(_col1)
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col1 (type: int)
+                      Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
         Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col1 (type: int)
+                  outputColumnNames: _col2, _col1
+                  Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: min(_col1)
+                    keys: _col2 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: int)
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: _col1 is not null (type: boolean)
-                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
                     Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
-                      Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                    Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 9 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -647,8 +784,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -656,18 +795,15 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -676,18 +812,35 @@ STAGE PLANS:
                   alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      keys: key (type: string), value (type: string)
-                      mode: hash
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: value (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -695,7 +848,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
@@ -711,6 +864,27 @@ STAGE PLANS:
         Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
@@ -718,14 +892,27 @@ STAGE PLANS:
                 Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   keys: _col0 (type: string), _col1 (type: string)
-                  mode: hash
+                  mode: complete
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: string)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -809,45 +996,66 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: lineitem
+                  Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: l_partkey (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
                   alias: li
                   Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
+                    predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
                     Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: int)
                         Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int), _col2 (type: int)
+                        value expressions: _col0 (type: int), _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 4 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
                     Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: l_orderkey (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                      outputColumnNames: l_orderkey
+                      Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
-                        keys: _col0 (type: int)
+                        keys: l_orderkey (type: int)
                         mode: hash
                         outputColumnNames: _col0
                         Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
@@ -858,44 +1066,37 @@ STAGE PLANS:
                           Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: lineitem
-                  Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: l_partkey is not null (type: boolean)
-                    Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      keys: l_partkey (type: int)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
-            Execution mode: llap
-            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                  1 _col1 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col1 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col1 (type: int)
-                  Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
-                  value expressions: _col2 (type: int)
-        Reducer 3 
+                  Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: int), _col3 (type: int)
+        Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
@@ -904,32 +1105,32 @@ STAGE PLANS:
                 keys:
                   0 _col1 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col2, _col4
-                Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                outputColumnNames: _col0, _col3
+                Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
-                  expressions: _col4 (type: int), _col2 (type: int)
+                  expressions: _col0 (type: int), _col3 (type: int)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
+        Reducer 7 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -979,3 +1180,3705 @@ POSTHOOK: Input: default@lineitem
 #### A masked pattern was here ####
 108570	8571
 4297	1798
+PREHOOK: query: --where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+PREHOOK: type: QUERY
+POSTHOOK: query: --where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean)
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col4 (type: string), _col5 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col4 (type: string), _col5 (type: int)
+                        Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_type (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col4 (type: string), _col5 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 12 Data size: 2544 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: min(_col1)
+                  keys: _col2 (type: string), _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: int)
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: int)
+                  outputColumnNames: _col0, _col2
+                  Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: string), _col2 (type: int)
+                    outputColumnNames: _col1, _col2
+                    Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: _col1 (type: string), _col2 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                        Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                    Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+105685	almond antique violet chocolate turquoise	Manufacturer#2	Brand#22	MEDIUM ANODIZED COPPER	14	MED CAN	1690.68	ly pending requ
+110592	almond antique salmon chartreuse burlywood	Manufacturer#1	Brand#15	PROMO BURNISHED NICKEL	6	JUMBO PKG	1602.59	 to the furiously
+112398	almond antique metallic orange dim	Manufacturer#3	Brand#32	MEDIUM BURNISHED BRASS	19	JUMBO JAR	1410.39	ole car
+132666	almond aquamarine rose maroon antique	Manufacturer#2	Brand#24	SMALL POLISHED NICKEL	25	MED BOX	1698.66	even 
+144293	almond antique olive coral navajo	Manufacturer#3	Brand#34	STANDARD POLISHED STEEL	45	JUMBO CAN	1337.29	ag furiously about 
+146985	almond aquamarine midnight light salmon	Manufacturer#2	Brand#23	MEDIUM BURNISHED COPPER	2	SM CASE	2031.98	s cajole caref
+15103	almond aquamarine dodger light gainsboro	Manufacturer#5	Brand#53	ECONOMY BURNISHED STEEL	46	LG PACK	1018.1	packages hinder carefu
+155733	almond antique sky peru orange	Manufacturer#5	Brand#53	SMALL PLATED BRASS	2	WRAP DRUM	1788.73	furiously. bra
+17273	almond antique forest lavender goldenrod	Manufacturer#3	Brand#35	PROMO ANODIZED TIN	14	JUMBO CASE	1190.27	along the
+17927	almond aquamarine yellow dodger mint	Manufacturer#4	Brand#41	ECONOMY BRUSHED COPPER	7	SM PKG	1844.92	ites. eve
+191709	almond antique violet turquoise frosted	Manufacturer#2	Brand#22	ECONOMY POLISHED STEEL	40	MED BOX	1800.7	 haggle
+195606	almond aquamarine sandy cyan gainsboro	Manufacturer#2	Brand#25	STANDARD PLATED TIN	18	SM PKG	1701.6	ic de
+33357	almond azure aquamarine papaya violet	Manufacturer#4	Brand#41	STANDARD ANODIZED TIN	12	WRAP CASE	1290.35	reful
+40982	almond antique misty red olive	Manufacturer#3	Brand#32	ECONOMY PLATED COPPER	1	LG PKG	1922.98	c foxes can s
+42669	almond antique medium spring khaki	Manufacturer#5	Brand#51	STANDARD BURNISHED TIN	6	MED CAN	1611.66	sits haggl
+45261	almond aquamarine floral ivory bisque	Manufacturer#4	Brand#42	SMALL PLATED STEEL	27	WRAP CASE	1206.26	careful
+48427	almond antique violet mint lemon	Manufacturer#4	Brand#42	PROMO POLISHED STEEL	39	SM CASE	1375.42	hely ironic i
+49671	almond antique gainsboro frosted violet	Manufacturer#4	Brand#41	SMALL BRUSHED BRASS	10	SM BOX	1620.67	ccounts run quick
+65667	almond aquamarine pink moccasin thistle	Manufacturer#1	Brand#12	LARGE BURNISHED STEEL	42	JUMBO CASE	1632.66	e across the expr
+78486	almond azure blanched chiffon midnight	Manufacturer#5	Brand#52	LARGE BRUSHED BRASS	23	MED BAG	1464.48	hely blith
+85768	almond antique chartreuse lavender yellow	Manufacturer#1	Brand#12	LARGE BRUSHED STEEL	34	SM BAG	1753.76	refull
+86428	almond aquamarine burnished black steel	Manufacturer#1	Brand#12	STANDARD ANODIZED STEEL	28	WRAP BAG	1414.42	arefully 
+90681	almond antique chartreuse khaki white	Manufacturer#3	Brand#31	MEDIUM BURNISHED TIN	17	SM CASE	1671.68	are slyly after the sl
+PREHOOK: query: --lhs contains non-simple expression
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: --lhs contains non-simple expression
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: (_col5 - 1) (type: int)
+                      sort order: +
+                      Map-reduce partition columns: (_col5 - 1) (type: int)
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: p_type, p_size
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: min(p_size)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 (_col5 - 1) (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col1
+                  Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+112398	almond antique metallic orange dim	Manufacturer#3	Brand#32	MEDIUM BURNISHED BRASS	19	JUMBO JAR	1410.39	ole car
+121152	almond antique burnished rose metallic	Manufacturer#1	Brand#14	PROMO PLATED TIN	2	JUMBO BOX	1173.15	e pinto beans h
+121152	almond antique burnished rose metallic	Manufacturer#1	Brand#14	PROMO PLATED TIN	2	JUMBO BOX	1173.15	e pinto beans h
+146985	almond aquamarine midnight light salmon	Manufacturer#2	Brand#23	MEDIUM BURNISHED COPPER	2	SM CASE	2031.98	s cajole caref
+15103	almond aquamarine dodger light gainsboro	Manufacturer#5	Brand#53	ECONOMY BURNISHED STEEL	46	LG PACK	1018.1	packages hinder carefu
+155733	almond antique sky peru orange	Manufacturer#5	Brand#53	SMALL PLATED BRASS	2	WRAP DRUM	1788.73	furiously. bra
+17927	almond aquamarine yellow dodger mint	Manufacturer#4	Brand#41	ECONOMY BRUSHED COPPER	7	SM PKG	1844.92	ites. eve
+191709	almond antique violet turquoise frosted	Manufacturer#2	Brand#22	ECONOMY POLISHED STEEL	40	MED BOX	1800.7	 haggle
+195606	almond aquamarine sandy cyan gainsboro	Manufacturer#2	Brand#25	STANDARD PLATED TIN	18	SM PKG	1701.6	ic de
+86428	almond aquamarine burnished black steel	Manufacturer#1	Brand#12	STANDARD ANODIZED STEEL	28	WRAP BAG	1414.42	arefully 
+PREHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: (_col0 * _col5) (type: int)
+                      sort order: +
+                      Map-reduce partition columns: (_col0 * _col5) (type: int)
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_partkey (type: int)
+                    outputColumnNames: p_type, p_partkey
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: min(p_partkey)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 (_col0 * _col5) (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col1
+                  Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+40982	almond antique misty red olive	Manufacturer#3	Brand#32	ECONOMY PLATED COPPER	1	LG PKG	1922.98	c foxes can s
+PREHOOK: query: --lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+PREHOOK: type: QUERY
+POSTHOOK: query: --lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: e
+                  Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_name (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), (_col1 + 100) (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int)
+                      Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
+                      Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: e
+                  Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: p_name (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), (_col1 + 100) (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: string), _col0 (type: int)
+                  outputColumnNames: _col2, _col0
+                  Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col2 (type: string), _col0 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                      Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                    Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: floor(_col7) (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: floor(_col7) (type: bigint)
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_retailprice (type: double)
+                    outputColumnN

<TRUNCATED>

[18/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
new file mode 100644
index 0000000..5f58aae
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -0,0 +1,329 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.LogicVisitor;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexSubQuery;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.tools.RelBuilderFactory;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder;
+
+/**
+ * NOTE: this rule is replicated from Calcite's SubqueryRemoveRule
+ * Transform that converts IN, EXISTS and scalar sub-queries into joins.
+ * TODO:
+ *  Reason this is replicated instead of using Calcite's is
+ *    Calcite creates null literal with null type but hive needs it to be properly typed
+ *    Need fix for Calcite-1493
+ *
+ * <p>Sub-queries are represented by {@link RexSubQuery} expressions.
+ *
+ * <p>A sub-query may or may not be correlated. If a sub-query is correlated,
+ * the wrapped {@link RelNode} will contain a {@link RexCorrelVariable} before
+ * the rewrite, and the product of the rewrite will be a {@link Correlate}.
+ * The Correlate can be removed using {@link RelDecorrelator}.
+ */
+public abstract class HiveSubQueryRemoveRule extends RelOptRule{
+
+    public static final HiveSubQueryRemoveRule FILTER =
+            new HiveSubQueryRemoveRule(
+                    operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE,
+                            any()),
+                    HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter") {
+                public void onMatch(RelOptRuleCall call) {
+                    final Filter filter = call.rel(0);
+                    //final RelBuilder builder = call.builder();
+                    final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
+                    final RexSubQuery e =
+                            RexUtil.SubQueryFinder.find(filter.getCondition());
+                    assert e != null;
+                    final RelOptUtil.Logic logic =
+                            LogicVisitor.find(RelOptUtil.Logic.TRUE,
+                                    ImmutableList.of(filter.getCondition()), e);
+                    builder.push(filter.getInput());
+                    final int fieldCount = builder.peek().getRowType().getFieldCount();
+                    final RexNode target = apply(e, filter.getVariablesSet(), logic,
+                            builder, 1, fieldCount);
+                    final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+                    builder.filter(shuttle.apply(filter.getCondition()));
+                    builder.project(fields(builder, filter.getRowType().getFieldCount()));
+                    call.transformTo(builder.build());
+                }
+            };
+
+    private HiveSubQueryRemoveRule(RelOptRuleOperand operand,
+                               RelBuilderFactory relBuilderFactory,
+                               String description) {
+        super(operand, relBuilderFactory, description);
+    }
+
+    protected RexNode apply(RexSubQuery e, Set<CorrelationId> variablesSet,
+                            RelOptUtil.Logic logic,
+                            HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
+        switch (e.getKind()) {
+            case SCALAR_QUERY:
+                builder.push(e.rel);
+                final RelMetadataQuery mq = RelMetadataQuery.instance();
+                final Boolean unique = mq.areColumnsUnique(builder.peek(),
+                        ImmutableBitSet.of());
+                if (unique == null || !unique) {
+                    builder.aggregate(builder.groupKey(),
+                            builder.aggregateCall(SqlStdOperatorTable.SINGLE_VALUE, false, null,
+                                    null, builder.field(0)));
+                }
+                builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+                return field(builder, inputCount, offset);
+
+            case IN:
+            case EXISTS:
+                // Most general case, where the left and right keys might have nulls, and
+                // caller requires 3-valued logic return.
+                //
+                // select e.deptno, e.deptno in (select deptno from emp)
+                //
+                // becomes
+                //
+                // select e.deptno,
+                //   case
+                //   when ct.c = 0 then false
+                //   when dt.i is not null then true
+                //   when e.deptno is null then null
+                //   when ct.ck < ct.c then null
+                //   else false
+                //   end
+                // from e
+                // left join (
+                //   (select count(*) as c, count(deptno) as ck from emp) as ct
+                //   cross join (select distinct deptno, true as i from emp)) as dt
+                //   on e.deptno = dt.deptno
+                //
+                // If keys are not null we can remove "ct" and simplify to
+                //
+                // select e.deptno,
+                //   case
+                //   when dt.i is not null then true
+                //   else false
+                //   end
+                // from e
+                // left join (select distinct deptno, true as i from emp) as dt
+                //   on e.deptno = dt.deptno
+                //
+                // We could further simplify to
+                //
+                // select e.deptno,
+                //   dt.i is not null
+                // from e
+                // left join (select distinct deptno, true as i from emp) as dt
+                //   on e.deptno = dt.deptno
+                //
+                // but have not yet.
+                //
+                // If the logic is TRUE we can just kill the record if the condition
+                // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner
+                // join:
+                //
+                // select e.deptno,
+                //   true
+                // from e
+                // inner join (select distinct deptno from emp) as dt
+                //   on e.deptno = dt.deptno
+                //
+
+                builder.push(e.rel);
+                final List<RexNode> fields = new ArrayList<>();
+                switch (e.getKind()) {
+                    case IN:
+                        fields.addAll(builder.fields());
+                }
+
+                // First, the cross join
+                switch (logic) {
+                    case TRUE_FALSE_UNKNOWN:
+                    case UNKNOWN_AS_TRUE:
+                        // Since EXISTS/NOT EXISTS are not affected by presence of
+                        // null keys we do not need to generate count(*), count(c)
+                        if (e.getKind() == SqlKind.EXISTS) {
+                            logic = RelOptUtil.Logic.TRUE_FALSE;
+                            break;
+                        }
+                        builder.aggregate(builder.groupKey(),
+                                builder.count(false, "c"),
+                                builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck",
+                                        builder.fields()));
+                        builder.as("ct");
+                        if( !variablesSet.isEmpty())
+                        {
+                            //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
+                            builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+                        }
+                        else
+                            builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
+
+                        offset += 2;
+                        builder.push(e.rel);
+                        break;
+                }
+
+                // Now the left join
+                switch (logic) {
+                    case TRUE:
+                        if (fields.isEmpty()) {
+                            builder.project(builder.alias(builder.literal(true), "i"));
+                            builder.aggregate(builder.groupKey(0));
+                        } else {
+                            builder.aggregate(builder.groupKey(fields));
+                        }
+                        break;
+                    default:
+                        fields.add(builder.alias(builder.literal(true), "i"));
+                        builder.project(fields);
+                        builder.distinct();
+                }
+                builder.as("dt");
+                final List<RexNode> conditions = new ArrayList<>();
+                for (Pair<RexNode, RexNode> pair
+                        : Pair.zip(e.getOperands(), builder.fields())) {
+                    conditions.add(
+                            builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
+                }
+                switch (logic) {
+                    case TRUE:
+                        builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet);
+                        return builder.literal(true);
+                }
+                builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
+
+                final List<RexNode> keyIsNulls = new ArrayList<>();
+                for (RexNode operand : e.getOperands()) {
+                    if (operand.getType().isNullable()) {
+                        keyIsNulls.add(builder.isNull(operand));
+                    }
+                }
+                final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
+                switch (logic) {
+                    case TRUE_FALSE_UNKNOWN:
+                    case UNKNOWN_AS_TRUE:
+                        operands.add(
+                                builder.equals(builder.field("ct", "c"), builder.literal(0)),
+                                builder.literal(false));
+                        //now that we are using LEFT OUTER JOIN to join inner count, count(*)
+                        // with outer table, we wouldn't be able to tell if count is zero
+                        // for inner table since inner join with correlated values will get rid
+                        // of all values where join cond is not true (i.e where actual inner table
+                        // will produce zero result). To  handle this case we need to check both
+                        // count is zero or count is null
+                        operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false));
+                        break;
+                }
+                operands.add(builder.isNotNull(builder.field("dt", "i")),
+                        builder.literal(true));
+                if (!keyIsNulls.isEmpty()) {
+                    //Calcite creates null literal with Null type here but because HIVE doesn't support null type
+                    // it is appropriately typed boolean
+                    operands.add(builder.or(keyIsNulls), e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN));
+                    // we are creating filter here so should not be returning NULL. Not sure why Calcite return NULL
+                    //operands.add(builder.or(keyIsNulls), builder.literal(false));
+                }
+                Boolean b = true;
+                switch (logic) {
+                    case TRUE_FALSE_UNKNOWN:
+                        b = null;
+                        // fall through
+                    case UNKNOWN_AS_TRUE:
+                        operands.add(
+                                builder.call(SqlStdOperatorTable.LESS_THAN,
+                                        builder.field("ct", "ck"), builder.field("ct", "c")),
+                                builder.literal(b));
+                        break;
+                }
+                operands.add(builder.literal(false));
+                return builder.call(SqlStdOperatorTable.CASE, operands.build());
+
+            default:
+                throw new AssertionError(e.getKind());
+        }
+    }
+
+    /** Returns a reference to a particular field, by offset, across several
+     * inputs on a {@link RelBuilder}'s stack. */
+    private RexInputRef field(HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
+        for (int inputOrdinal = 0;;) {
+            final RelNode r = builder.peek(inputCount, inputOrdinal);
+            if (offset < r.getRowType().getFieldCount()) {
+                return builder.field(inputCount, inputOrdinal, offset);
+            }
+            ++inputOrdinal;
+            offset -= r.getRowType().getFieldCount();
+        }
+    }
+
+    /** Returns a list of expressions that project the first {@code fieldCount}
+     * fields of the top input on a {@link RelBuilder}'s stack. */
+    private static List<RexNode> fields(HiveSubQRemoveRelBuilder builder, int fieldCount) {
+        final List<RexNode> projects = new ArrayList<>();
+        for (int i = 0; i < fieldCount; i++) {
+            projects.add(builder.field(i));
+        }
+        return projects;
+    }
+
+    /** Shuttle that replaces occurrences of a given
+     * {@link org.apache.calcite.rex.RexSubQuery} with a replacement
+     * expression. */
+    private static class ReplaceSubQueryShuttle extends RexShuttle {
+        private final RexSubQuery subQuery;
+        private final RexNode replacement;
+
+        public ReplaceSubQueryShuttle(RexSubQuery subQuery, RexNode replacement) {
+            this.subQuery = subQuery;
+            this.replacement = replacement;
+        }
+
+        @Override public RexNode visitSubQuery(RexSubQuery subQuery) {
+            return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery;
+        }
+    }
+}
+
+// End SubQueryRemoveRule.java

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index d494c9f..73a9b0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -266,6 +266,11 @@ public class HiveOpConverter {
     TableScanOperator ts = (TableScanOperator) OperatorFactory.get(
         semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
 
+    //now that we let Calcite process subqueries we might have more than one
+    // tablescan with same alias.
+    if(topOps.get(tableAlias) != null) {
+      tableAlias = tableAlias +  this.uniqueCounter ;
+    }
     topOps.put(tableAlias, ts);
 
     if (LOG.isDebugEnabled()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 679cec1..d36eb0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -32,12 +32,14 @@ import org.apache.calcite.avatica.util.TimeUnit;
 import org.apache.calcite.avatica.util.TimeUnitRange;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexSubQuery;
 import org.apache.calcite.sql.SqlCollation;
 import org.apache.calcite.sql.SqlIntervalQualifier;
 import org.apache.calcite.sql.SqlKind;
@@ -54,6 +56,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
 import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
@@ -68,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
@@ -117,22 +121,43 @@ public class RexNodeConverter {
   private final ImmutableList<InputCtx> inputCtxs;
   private final boolean                 flattenExpr;
 
+  //outerRR belongs to outer query and is required to resolve correlated references
+  private final RowResolver             outerRR;
+  private final ImmutableMap<String, Integer> outerNameToPosMap;
+  private int correlatedId;
+
   //Constructor used by HiveRexExecutorImpl
   public RexNodeConverter(RelOptCluster cluster) {
     this(cluster, new ArrayList<InputCtx>(), false);
   }
 
+  //subqueries will need outer query's row resolver
+  public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
+                          ImmutableMap<String, Integer> outerNameToPosMap,
+      ImmutableMap<String, Integer> nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr, int correlatedId) {
+    this.cluster = cluster;
+    this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR , offset));
+    this.flattenExpr = flattenExpr;
+    this.outerRR = outerRR;
+    this.outerNameToPosMap = outerNameToPosMap;
+    this.correlatedId = correlatedId;
+  }
+
   public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
       ImmutableMap<String, Integer> nameToPosMap, int offset, boolean flattenExpr) {
     this.cluster = cluster;
     this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset));
     this.flattenExpr = flattenExpr;
+    this.outerRR = null;
+    this.outerNameToPosMap = null;
   }
 
   public RexNodeConverter(RelOptCluster cluster, List<InputCtx> inpCtxLst, boolean flattenExpr) {
     this.cluster = cluster;
     this.inputCtxs = ImmutableList.<InputCtx> builder().addAll(inpCtxLst).build();
     this.flattenExpr = flattenExpr;
+    this.outerRR  = null;
+    this.outerNameToPosMap = null;
   }
 
   public RexNode convert(ExprNodeDesc expr) throws SemanticException {
@@ -144,12 +169,42 @@ public class RexNodeConverter {
       return convert((ExprNodeColumnDesc) expr);
     } else if (expr instanceof ExprNodeFieldDesc) {
       return convert((ExprNodeFieldDesc) expr);
+    } else if(expr instanceof  ExprNodeSubQueryDesc) {
+      return convert((ExprNodeSubQueryDesc) expr);
     } else {
       throw new RuntimeException("Unsupported Expression");
     }
     // TODO: handle ExprNodeColumnListDesc
   }
 
+  private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws  SemanticException {
+    if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN) {
+     /*
+      * Check.5.h :: For In and Not In the SubQuery must implicitly or
+      * explicitly only contain one select item.
+      */
+      if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) {
+        throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+                "SubQuery can contain only 1 item in Select List."));
+      }
+      //create RexNode for LHS
+      RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs());
+
+      //create RexSubQuery node
+      RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(),
+                                              ImmutableList.<RexNode>of(rexNodeLhs) );
+      return  rexSubQuery;
+    }
+    else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) {
+      RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery());
+      return subQueryNode;
+    }
+    else {
+      throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+              "Currently only IN and EXISTS type of subqueries are supported"));
+    }
+  }
+
   private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException {
     RexNode rexNode = convert(fieldDesc.getDesc());
     if (rexNode.getType().isStruct()) {
@@ -420,7 +475,7 @@ public class RexNodeConverter {
   private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException {
     InputCtx ctxLookingFor = null;
 
-    if (inputCtxs.size() == 1) {
+    if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) {
       ctxLookingFor = inputCtxs.get(0);
     } else {
       String tableAlias = col.getTabAlias();
@@ -443,7 +498,21 @@ public class RexNodeConverter {
   }
 
   protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException {
+    //if this is co-rrelated we need to make RexCorrelVariable(with id and type)
+    // id and type should be retrieved from outerRR
     InputCtx ic = getInputCtx(col);
+    if(ic == null) {
+      // we have correlated column, build data type from outer rr
+      RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null);
+      if (this.outerNameToPosMap.get(col.getColumn()) == null) {
+        throw new SemanticException(ErrorMsg.INVALID_COLUMN_NAME.getMsg(col.getColumn()));
+      }
+
+      int pos = this.outerNameToPosMap.get(col.getColumn());
+      CorrelationId colCorr = new CorrelationId(this.correlatedId);
+      RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr);
+      return cluster.getRexBuilder().makeFieldAccess(corExpr, pos);
+    }
     int pos = ic.hiveNameToPosMap.get(col.getColumn());
     return cluster.getRexBuilder().makeInputRef(
         ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 07155fd..278f5b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -24,9 +24,11 @@ import java.lang.reflect.UndeclaredThrowableException;
 import java.math.BigDecimal;
 import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
+import java.util.ArrayDeque;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Collections;
+import java.util.Deque;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -109,6 +111,7 @@ import org.apache.calcite.util.CompositeList;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.ImmutableIntList;
 import org.apache.calcite.util.Pair;
+import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -136,6 +139,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -192,6 +196,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewFilterScanRule;
@@ -208,7 +213,6 @@ import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
 import org.apache.hadoop.hive.ql.parse.QBExpr.Opcode;
-import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
@@ -242,6 +246,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList.Builder;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
+import com.google.common.math.IntMath;
 
 public class CalcitePlanner extends SemanticAnalyzer {
 
@@ -332,6 +337,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
         boolean reAnalyzeAST = false;
         final boolean materializedView = getQB().isMaterializedView();
 
+        // currently semi-join optimization doesn't work with subqueries
+        // so this will be turned off for if we find subqueries and will later be
+        // restored to its original state
+        boolean originalSemiOptVal = this.conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION);
         try {
           if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
             sinkOp = getOptimizedHiveOPDag();
@@ -431,6 +440,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
             super.genResolvedParseTree(ast, new PlannerContext());
             skipCalcitePlan = true;
           }
+          // restore semi-join opt flag
+          this.conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, originalSemiOptVal);
         }
       } else {
         this.ctx.setCboInfo("Plan not optimized by CBO.");
@@ -1000,6 +1011,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
     private final ColumnAccessInfo columnAccessInfo;
     private Map<HiveProject, Table> viewProjectToTableSchema;
 
+    //correlated vars across subqueries within same query needs to have different ID
+    // this will be used in RexNodeConverter to create cor var
+    private int subqueryId;
+
     // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
     // just last one.
     LinkedHashMap<RelNode, RowResolver>                   relToHiveRR                   = new LinkedHashMap<RelNode, RowResolver>();
@@ -1015,6 +1030,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       RelNode calciteGenPlan = null;
       RelNode calcitePreCboPlan = null;
       RelNode calciteOptimizedPlan = null;
+      subqueryId = -1;
 
       /*
        * recreate cluster, so that it picks up the additional traitDef
@@ -1038,7 +1054,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // 1. Gen Calcite Plan
       perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
       try {
-        calciteGenPlan = genLogicalPlan(getQB(), true);
+        calciteGenPlan = genLogicalPlan(getQB(), true, null, null);
         resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema(
             relToHiveRR.get(calciteGenPlan),
             HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
@@ -1062,6 +1078,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // Create executor
       Executor executorProvider = new HiveRexExecutorImpl(optCluster);
 
+      //Remove subquery
+      LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+      calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
+              HiveSubQueryRemoveRule.FILTER);
+      LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+
+      calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan);
+      LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
+
       // 2. Apply pre-join order optimizations
       calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan,
               mdProvider.getMetadataProvider(), executorProvider);
@@ -2018,8 +2043,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
 
     private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel,
+            ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR,
             boolean useCaching) throws SemanticException {
-      ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), useCaching);
+      ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel),
+              outerRR, null, useCaching);
       if (filterCondn instanceof ExprNodeConstantDesc
           && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
         // queries like select * from t1 where 'foo';
@@ -2035,7 +2062,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
       ImmutableMap<String, Integer> hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap
           .get(srcRel);
       RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(),
-          hiveColNameCalcitePosMap, 0, true).convert(filterCondn);
+          outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR,
+              0, true, subqueryId).convert(filterCondn);
       RexNode factoredFilterExpr = RexUtil
           .pullFactors(cluster.getRexBuilder(), convertedFilterExpr);
       RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
@@ -2047,66 +2075,46 @@ public class CalcitePlanner extends SemanticAnalyzer {
       return filterRel;
     }
 
-    private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
-        Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException {
-      /*
-       * Handle Subquery predicates.
-       *
-       * Notes (8/22/14 hb): Why is this a copy of the code from {@link
-       * #genFilterPlan} - for now we will support the same behavior as non CBO
-       * route. - but plan to allow nested SubQueries(Restriction.9.m) and
-       * multiple SubQuery expressions(Restriction.8.m). This requires use to
-       * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush
-       * out Null semantics(CALCITE-373) - besides only the driving code has
-       * been copied. Most of the code which is SubQueryUtils and QBSubQuery is
-       * reused.
-       */
-      int numSrcColumns = srcRel.getRowType().getFieldCount();
-      List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
-      if (subQueriesInOriginalTree.size() > 0) {
+    private void subqueryRestritionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
+                                         boolean forHavingClause, Map<String, RelNode> aliasToRel ) throws SemanticException {
+        List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
+        if (subQueriesInOriginalTree.size() > 0) {
 
         /*
          * Restriction.9.m :: disallow nested SubQuery expressions.
          */
-        if (qb.getSubQueryPredicateDef() != null) {
-          throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
-              subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
-        }
+          if (qb.getSubQueryPredicateDef() != null) {
+            throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                    subQueriesInOriginalTree.get(0),
+                    "Nested SubQuery expressions are not supported."));
+          }
 
         /*
          * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
          */
-        if (subQueriesInOriginalTree.size() > 1) {
+          if (subQueriesInOriginalTree.size() > 1) {
 
-          throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
-              subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
-        }
-
-        /*
-         * Clone the Search AST; apply all rewrites on the clone.
-         */
-        ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
-        List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
-
-        RowResolver inputRR = relToHiveRR.get(srcRel);
-        RowResolver outerQBRR = inputRR;
-        ImmutableMap<String, Integer> outerQBPosMap = relToHiveColNameCalcitePosMap.get(srcRel);
-
-        for (int i = 0; i < subQueries.size(); i++) {
-          ASTNode subQueryAST = subQueries.get(i);
-          ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
+            throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                    subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
+          }
 
+          //we do not care about the transformation or rewriting of AST
+          // which following statement does
+          // we only care about the restriction checks they perform.
+          // We plan to get rid of these restrictions later
           int sqIdx = qb.incrNumSubQueryPredicates();
+          ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(0);
+
+          ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
+          List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
+          ASTNode subQueryAST = subQueries.get(0);
           clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
 
           QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST,
-              originalSubQueryAST, ctx);
+                  originalSubQueryAST, ctx);
+
+          RowResolver inputRR = relToHiveRR.get(srcRel);
 
-          if (!forHavingClause) {
-            qb.setWhereClauseSubQueryPredicate(subQuery);
-          } else {
-            qb.setHavingClauseSubQueryPredicate(subQuery);
-          }
           String havingInputAlias = null;
 
           if (forHavingClause) {
@@ -2115,78 +2123,78 @@ public class CalcitePlanner extends SemanticAnalyzer {
           }
 
           subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias,
-              aliasToRel.keySet());
-
-          QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
-          qbSQ.setSubQueryDef(subQuery.getSubQuery());
-          Phase1Ctx ctx_1 = initPhase1Ctx();
-          doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null);
-          getMetaData(qbSQ);
-          RelNode subQueryRelNode = genLogicalPlan(qbSQ, false);
-          aliasToRel.put(subQuery.getAlias(), subQueryRelNode);
-          RowResolver sqRR = relToHiveRR.get(subQueryRelNode);
-
-          /*
-           * Check.5.h :: For In and Not In the SubQuery must implicitly or
-           * explicitly only contain one select item.
-           */
-          if (subQuery.getOperator().getType() != SubQueryType.EXISTS
-              && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
-              && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
-            throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST,
-                "SubQuery can contain only 1 item in Select List."));
-          }
-
-          /*
-           * If this is a Not In SubQuery Predicate then Join in the Null Check
-           * SubQuery. See QBSubQuery.NotInCheck for details on why and how this
-           * is constructed.
-           */
-          if (subQuery.getNotInCheck() != null) {
-            QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
-            notInCheck.setSQRR(sqRR);
-            QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
-            qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery());
-            ctx_1 = initPhase1Ctx();
-            doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null);
-            getMetaData(qbSQ_nic);
-            RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false);
-            aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode);
-            srcRel = genJoinRelNode(srcRel, subQueryNICRelNode,
-            // set explicitly to inner until we figure out SemiJoin use
-            // notInCheck.getJoinType(),
-                JoinType.INNER, notInCheck.getJoinConditionAST());
-            inputRR = relToHiveRR.get(srcRel);
-            if (forHavingClause) {
-              aliasToRel.put(havingInputAlias, srcRel);
-            }
-          }
-
-          /*
-           * Gen Join between outer Operator and SQ op
-           */
-          subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
-          srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(),
-              subQuery.getJoinConditionAST());
-          searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
-
-          srcRel = genFilterRelNode(searchCond, srcRel, forHavingClause);
-
-          /*
-           * For Not Exists and Not In, add a projection on top of the Left
-           * Outer Join.
-           */
-          if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
-              || subQuery.getOperator().getType() != SubQueryType.NOT_IN) {
-            srcRel = projectLeftOuterSide(srcRel, numSrcColumns);
+                  aliasToRel.keySet());
+
+          // Missing Check: Check.5.h :: For In and Not In the SubQuery must implicitly or
+          // explicitly only contain one select item.
+        }
+    }
+    private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause,
+                                       Map<ASTNode, RelNode> subQueryToRelNode,
+                                       Map<String, RelNode> aliasToRel) throws SemanticException {
+
+        //disallow subqueries which HIVE doesn't currently support
+        subqueryRestritionCheck(qb, node, srcRel, forHavingClause, aliasToRel);
+        Deque<ASTNode> stack = new ArrayDeque<ASTNode>();
+        stack.push(node);
+
+        boolean isSubQuery = false;
+
+        while (!stack.isEmpty()) {
+          ASTNode next = stack.pop();
+
+          switch(next.getType()) {
+            case HiveParser.TOK_SUBQUERY_EXPR:
+              String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates();
+              QB qbSQ = new QB(qb.getId(), sbQueryAlias, true);
+              Phase1Ctx ctx1 = initPhase1Ctx();
+              doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null);
+              getMetaData(qbSQ);
+              subqueryId++;
+              RelNode subQueryRelNode = genLogicalPlan(qbSQ, false,  relToHiveColNameCalcitePosMap.get(srcRel),
+                      relToHiveRR.get(srcRel));
+              subQueryToRelNode.put(next, subQueryRelNode);
+              isSubQuery = true;
+              break;
+            default:
+              int childCount = next.getChildCount();
+              for(int i = childCount - 1; i >= 0; i--) {
+                stack.push((ASTNode) next.getChild(i));
+              }
           }
-        }
-        relToHiveRR.put(srcRel, outerQBRR);
-        relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap);
-        return srcRel;
       }
-
-      return genFilterRelNode(searchCond, srcRel, forHavingClause);
+      return isSubQuery;
+    }
+    private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
+        Map<String, RelNode> aliasToRel, ImmutableMap<String, Integer> outerNameToPosMap,
+        RowResolver outerRR, boolean forHavingClause) throws SemanticException {
+
+      Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
+      boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause,
+                                                subQueryToRelNode, aliasToRel);
+      if(isSubQuery) {
+        ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel),
+                outerRR, subQueryToRelNode, forHavingClause);
+
+        ImmutableMap<String, Integer> hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap
+                .get(srcRel);
+        RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(),
+                outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel),
+                outerRR, 0, true, subqueryId).convert(subQueryExpr);
+
+        RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+                srcRel, convertedFilterLHS);
+
+        this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap
+                .get(srcRel));
+        relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
+
+        // semi-join opt doesn't work with subqueries
+        conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, false);
+        return filterRel;
+      } else {
+        return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause);
+      }
     }
 
     private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException {
@@ -2213,14 +2221,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
 
     private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel,
-        boolean forHavingClause) throws SemanticException {
+              ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR,
+                                         boolean forHavingClause) throws SemanticException {
       RelNode filterRel = null;
 
       Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
           .iterator();
       if (whereClauseIterator.hasNext()) {
         filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
-            aliasToRel, forHavingClause);
+            aliasToRel, outerNameToPosMap, outerRR, forHavingClause);
       }
 
       return filterRel;
@@ -3521,7 +3530,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
     private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {
       switch (qbexpr.getOpcode()) {
       case NULLOP:
-        return genLogicalPlan(qbexpr.getQB(), false);
+        return genLogicalPlan(qbexpr.getQB(), false, null, null);
       case UNION:
       case INTERSECT:
       case INTERSECTALL:
@@ -3536,7 +3545,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
     }
 
-    private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException {
+    private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
+                                   ImmutableMap<String, Integer> outerNameToPosMap,
+                                   RowResolver outerRR) throws SemanticException {
       RelNode srcRel = null;
       RelNode filterRel = null;
       RelNode gbRel = null;
@@ -3609,7 +3620,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
 
       // 2. Build Rel for where Clause
-      filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false);
+      filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false);
       srcRel = (filterRel == null) ? srcRel : filterRel;
       RelNode starSrcRel = srcRel;
 
@@ -3717,7 +3728,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
           // Special handling of grouping function
           targetNode = rewriteGroupingFunctionAST(getGroupByForClause(qbp, destClauseName), targetNode);
         }
-        gbFilter = genFilterRelNode(qb, targetNode, srcRel, aliasToRel, true);
+        gbFilter = genFilterRelNode(qb, targetNode, srcRel, aliasToRel, null, null, true);
       }
 
       return gbFilter;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index 3458fb6..21ddae6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -538,16 +538,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
     }
     if ( sharedAlias != null) {
       ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
-
-      if ( whereClause != null ) {
-        ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(whereClause);
-        if ( u != null ) {
-          subQueryAST.setOrigin(originalSQASTOrigin);
-          throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
-              u, "SubQuery cannot use the table alias: " + sharedAlias + "; " +
-                  "this is also an alias in the Outer Query and SubQuery contains a unqualified column reference"));
-        }
-      }
     }
 
     /*

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1f6dfdd..a861263 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -51,6 +51,7 @@ import org.antlr.runtime.tree.TreeVisitor;
 import org.antlr.runtime.tree.TreeVisitorAction;
 import org.antlr.runtime.tree.TreeWizard;
 import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
+import org.apache.calcite.rel.RelNode;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -11325,6 +11326,17 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return genExprNodeDesc(expr, input, true, false);
   }
 
+  public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input,
+                                      RowResolver outerRR, Map<ASTNode, RelNode> subqueryToRelNode,
+                                      boolean useCaching) throws SemanticException {
+
+    TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, false);
+    tcCtx.setOuterRR(outerRR);
+    tcCtx.setSubqueryToRelNode(subqueryToRelNode);
+    return genExprNodeDesc(expr, input, tcCtx);
+  }
+
+
   public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching)
       throws SemanticException {
     return genExprNodeDesc(expr, input, useCaching, false);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
index 02896ff..9753f9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
@@ -18,9 +18,11 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import org.apache.calcite.rel.RelNode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import java.util.Map;
 
 /**
  * This class implements the context information that is used for typechecking
@@ -35,6 +37,19 @@ public class TypeCheckCtx implements NodeProcessorCtx {
    */
   private RowResolver inputRR;
 
+  /**
+   * RowResolver of outer query. This is used to resolve co-rrelated columns in Filter
+   * TODO:
+   *  this currently will only be able to resolve reference to parent query's column
+   *  this will not work for references to grand-parent column
+   */
+  private RowResolver outerRR;
+
+  /**
+   * Map from astnode of a subquery to it's logical plan.
+   */
+  private Map<ASTNode, RelNode> subqueryToRelNode;
+
   private final boolean useCaching;
 
   private final boolean foldExpr;
@@ -50,7 +65,7 @@ public class TypeCheckCtx implements NodeProcessorCtx {
   private String error;
 
   /**
-   * The node that generated the potential typecheck error
+   * The node that generated the potential typecheck error.
    */
   private ASTNode errorSrcNode;
 
@@ -104,6 +119,8 @@ public class TypeCheckCtx implements NodeProcessorCtx {
     this.allowWindowing = allowWindowing;
     this.allowIndexExpr = allowIndexExpr;
     this.allowSubQueryExpr = allowSubQueryExpr;
+    this.outerRR = null;
+    this.subqueryToRelNode = null;
   }
 
   /**
@@ -122,6 +139,36 @@ public class TypeCheckCtx implements NodeProcessorCtx {
   }
 
   /**
+   * @param outerRR
+   *          the outerRR to set
+   */
+  public void setOuterRR(RowResolver outerRR) {
+    this.outerRR = outerRR;
+  }
+
+  /**
+   * @return the outerRR
+   */
+  public RowResolver getOuterRR() {
+    return outerRR;
+  }
+
+  /**
+   * @param subqueryToRelNode
+   *          the subqueryToRelNode to set
+   */
+  public void setSubqueryToRelNode(Map<ASTNode, RelNode> subqueryToRelNode) {
+    this.subqueryToRelNode = subqueryToRelNode;
+  }
+
+  /**
+   * @return the outerRR
+   */
+  public Map<ASTNode, RelNode> getSubqueryToRelNode() {
+    return subqueryToRelNode;
+  }
+
+  /**
    * @param unparseTranslator
    *          the unparseTranslator to set
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index ace3eaf..6c30efd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -30,6 +30,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import org.apache.calcite.rel.RelNode;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.hadoop.hive.common.type.HiveChar;
@@ -43,7 +44,6 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
@@ -60,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
@@ -134,7 +136,10 @@ public class TypeCheckProcFactory {
     ASTNode expr = (ASTNode) nd;
     TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
 
-    if (!ctx.isUseCaching()) {
+    // bypass only if outerRR is not null. Otherwise we need to look for expressions in outerRR for
+    // subqueries e.g. select min(b.value) from table b group by b.key
+    //                                  having key in (select .. where a = min(b.value)
+    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
       return null;
     }
 
@@ -147,6 +152,12 @@ public class TypeCheckProcFactory {
 
     // If the current subExpression is pre-calculated, as in Group-By etc.
     ColumnInfo colInfo = input.getExpression(expr);
+
+    // try outer row resolver
+    RowResolver outerRR = ctx.getOuterRR();
+    if(colInfo == null && outerRR != null) {
+        colInfo = outerRR.getExpression(expr);
+    }
     if (colInfo != null) {
       desc = new ExprNodeColumnDesc(colInfo);
       ASTNode source = input.getExpressionSource(expr);
@@ -201,14 +212,14 @@ public class TypeCheckProcFactory {
         + HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
     opRules.put(new RuleRegExp("R7", HiveParser.TOK_TABLE_OR_COL + "%"),
         tf.getColumnExprProcessor());
-    opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_OP + "%"),
+    opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_EXPR + "%"),
         tf.getSubQueryExprProcessor());
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(),
         opRules, tcCtx);
-    GraphWalker ogw = new DefaultGraphWalker(disp);
+    GraphWalker ogw = new ExpressionWalker(disp);
 
     // Create a list of top nodes
     ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
@@ -618,6 +629,13 @@ public class TypeCheckProcFactory {
       boolean isTableAlias = input.hasTableAlias(tableOrCol);
       ColumnInfo colInfo = input.get(null, tableOrCol);
 
+      // try outer row resolver
+      if(ctx.getOuterRR() != null && colInfo == null && !isTableAlias) {
+        RowResolver outerRR = ctx.getOuterRR();
+        isTableAlias = outerRR.hasTableAlias(tableOrCol);
+        colInfo = outerRR.get(null, tableOrCol);
+      }
+
       if (isTableAlias) {
         if (colInfo != null) {
           if (parent != null && parent.getType() == HiveParser.DOT) {
@@ -1158,6 +1176,12 @@ public class TypeCheckProcFactory {
       }
       ColumnInfo colInfo = input.get(tableAlias, colName);
 
+      // Try outer Row resolver
+      if(colInfo == null && ctx.getOuterRR() != null) {
+        RowResolver outerRR = ctx.getOuterRR();
+        colInfo = outerRR.get(tableAlias, colName);
+      }
+
       if (colInfo == null) {
         ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
         return null;
@@ -1222,6 +1246,10 @@ public class TypeCheckProcFactory {
         return null;
       }
 
+      if(expr.getType() == HiveParser.TOK_SUBQUERY_OP || expr.getType() == HiveParser.TOK_QUERY) {
+        return null;
+      }
+
       if (expr.getType() == HiveParser.TOK_TABNAME) {
         return null;
       }
@@ -1379,11 +1407,47 @@ public class TypeCheckProcFactory {
         return desc;
       }
 
+      //TOK_SUBQUERY_EXPR should have either 2 or 3 children
+      assert(expr.getChildren().size() == 3 || expr.getChildren().size() == 2);
+      //First child should be operand
+      assert(expr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP);
+
+      ASTNode subqueryOp = (ASTNode) expr.getChild(0);
+
+      boolean isIN = (subqueryOp.getChild(0).getType() == HiveParser.KW_IN
+              || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN);
+      boolean isEXISTS = (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS
+              || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS);
+
+      // subqueryToRelNode might be null if subquery expression anywhere other than
+      //  as expected in filter (where/having). We should throw an appropriate error
+      // message
+
+      Map<ASTNode, RelNode> subqueryToRelNode = ctx.getSubqueryToRelNode();
+      if(subqueryToRelNode == null) {
+        throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                        " Currently SubQuery expressions are only allowed as " +
+                                "Where and Having Clause predicates"));
+      }
+
+      //For now because subquery is only supported in filter
+      // we will create subquery expression of boolean type
+      if(isEXISTS) {
+        return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr),
+                ExprNodeSubQueryDesc.SubqueryType.EXISTS);
+      }
+      if(isIN) {
+        assert(nodeOutputs[2] != null);
+        ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2];
+        return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr),
+                ExprNodeSubQueryDesc.SubqueryType.IN, lhs);
+      }
+
       /*
        * Restriction.1.h :: SubQueries only supported in the SQL Where Clause.
        */
       ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode,
-          "Currently SubQuery expressions are only allowed as Where Clause predicates"),
+          "Currently only IN & EXISTS SubQuery expressions are allowed"),
           sqNode);
       return null;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java
new file mode 100644
index 0000000..aec331b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.calcite.rel.RelNode;
+
+/**
+ * This encapsulate subquery expression which consists of
+ *  Relnode for subquery.
+ *  type (IN, EXISTS )
+ *  LHS operand
+ */
+public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable {
+  private static final long serialVersionUID = 1L;
+
+  public static enum SubqueryType{
+    IN,
+    EXISTS,
+  };
+
+  public static final int IN=1;
+  public static final int EXISTS=2;
+
+  /**
+   * RexNode corresponding to subquery.
+   */
+  private RelNode rexSubQuery;
+  private ExprNodeDesc subQueryLhs;
+  private SubqueryType type;
+
+  public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type) {
+    super(typeInfo);
+    this.rexSubQuery = subQuery;
+    this.subQueryLhs = null;
+    this.type = type;
+  }
+  public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery,
+                              SubqueryType type, ExprNodeDesc lhs) {
+    super(typeInfo);
+    this.rexSubQuery = subQuery;
+    this.subQueryLhs = lhs;
+    this.type = type;
+
+  }
+
+  public SubqueryType getType() {
+    return type;
+  }
+
+  public ExprNodeDesc getSubQueryLhs() {
+    return subQueryLhs;
+  }
+
+  public RelNode getRexSubQuery() {
+    return rexSubQuery;
+  }
+
+  @Override
+  public ExprNodeDesc clone() {
+    return new ExprNodeSubQueryDesc(typeInfo, rexSubQuery, type, subQueryLhs);
+  }
+
+  @Override
+  public boolean isSame(Object o) {
+    if (!(o instanceof ExprNodeSubQueryDesc)) {
+      return false;
+    }
+    ExprNodeSubQueryDesc dest = (ExprNodeSubQueryDesc) o;
+    if (subQueryLhs != null && dest.getSubQueryLhs() != null) {
+      if (!subQueryLhs.equals(dest.getSubQueryLhs())) {
+        return false;
+      }
+    }
+    if (!typeInfo.equals(dest.getTypeInfo())) {
+      return false;
+    }
+    if (!rexSubQuery.equals(dest.getRexSubQuery())) {
+      return false;
+    }
+    if(type != dest.getType()) {
+      return false;
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_corr_from.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_from.q b/ql/src/test/queries/clientnegative/subquery_corr_from.q
new file mode 100644
index 0000000..7df52c9
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_from.q
@@ -0,0 +1,3 @@
+-- corr var are only supported in where/having clause
+
+select * from part po where p_size IN (select p_size from (select p_size, p_type from part pp where pp.p_brand = po.p_brand) p where p.p_type=po.p_type) ;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q b/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q
new file mode 100644
index 0000000..3519c5b
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q
@@ -0,0 +1,5 @@
+-- inner query can only refer to it's parent query columns
+select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where x.p_name = z.p_name))
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_corr_select.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_select.q b/ql/src/test/queries/clientnegative/subquery_corr_select.q
new file mode 100644
index 0000000..a47cee1
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_select.q
@@ -0,0 +1,2 @@
+-- correlated var is only allowed in where/having
+explain select * from part po where p_size IN (select po.p_size from part p where p.p_type=po.p_type) ;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_restrictions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_restrictions.q b/ql/src/test/queries/clientnegative/subquery_restrictions.q
new file mode 100644
index 0000000..80870d7
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_restrictions.q
@@ -0,0 +1,92 @@
+--Restriction.1.h SubQueries only supported in the SQL Where Clause.
+select src.key in (select key from src s1 where s1.key > '9') 
+from src;
+
+select count(*) 
+from src 
+group by src.key in (select key from src s1 where s1.key > '9') ;
+
+--Restriction.2.h The subquery can only be the RHS of an expression
+----curently paser doesn't allow such queries
+--select * from part where (select p_size from part) IN (1,2);
+
+--Restriction.3.m The predicate operators supported are In, Not In, exists and Not exists.
+----select * from part where p_brand > (select key from src);  
+
+--Check.4.h For Exists and Not Exists, the Sub Query must have 1 or more correlated predicates.
+select * from src where exists (select * from part);
+
+--Check.5.h multiple columns in subquery select
+select * from src where src.key in (select * from src s1 where s1.key > '9');
+
+--Restriction.6.m The LHS in a SubQuery must have all its Column References be qualified
+--This is not restriction anymore
+
+--Restriction 7.h subquery with or condition
+select count(*) 
+from src 
+where src.key in (select key from src s1 where s1.key > '9') or src.value is not null
+;
+
+--Restriction.8.m We allow only 1 SubQuery expression per Query
+select * from part where p_size IN (select p_size from part) AND p_brand IN (select p_brand from part);
+
+--Restriction 9.m nested subquery
+select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
+;
+
+--Restriction.10.h In a SubQuery references to Parent Query columns is only supported in the where clause.
+select * from part where p_size in (select p.p_size + part.p_size from part p);
+select * from part where part.p_size IN (select min(p_size) from part p group by part.p_type);
+
+
+--Restriction.11.m A SubQuery predicate that refers to a Parent Query column must be a valid Join predicate
+select * from part where p_size in (select p_size from part p where p.p_type > part.p_type);
+select * from part where part.p_size IN (select min(p_size) from part p where NOT(part.p_type = p.p_type));
+
+
+--Check.12.h SubQuery predicates cannot only refer to Parent Query columns
+select * from part where p_name IN (select p_name from part p where part.p_type <> 1);
+
+--Restriction.13.m In the case of an implied Group By on a correlated Sub- Query, the SubQuery always returns 1 row. For e.g. a count on an empty set is 0, while all other UDAFs return null. Converting such a SubQuery into a Join by computing all Groups in one shot, changes the semantics: the Group By SubQuery output will not contain rows for Groups that don\u2019t exist.
+select * 
+from src b 
+where exists 
+  (select count(*) 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
+
+--Restriction.14.h Correlated Sub Queries cannot contain Windowing clauses.
+select p_mfgr, p_name, p_size 
+from part a 
+where a.p_size in 
+  (select first_value(p_size) over(partition by p_mfgr order by p_size) 
+   from part b 
+   where a.p_brand = b.p_brand)
+;
+
+--Restriction 15.h all unqualified column references in a SubQuery will resolve to table sources within the SubQuery.
+select *
+from src
+where src.key in (select key from src where key > '9')
+;
+
+----------------------------------------------------------------
+-- Following tests does not fall under any restrictions per-se, they just currently don't work with HIVE
+----------------------------------------------------------------
+
+-- correlated var which refers to outer query join table 
+explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_partkey = p.l_partkey) ;
+
+-- union, not in, corr
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);
+
+-- union, not in, corr, cor var in both queries
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part pp where pp.p_mfgr = part.p_comment);
+
+-- IN, union, corr
+explain select * from part where p_name IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
index cbfb5d5..8936073 100644
--- a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
+++ b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
@@ -1,5 +1,6 @@
 set hive.cbo.returnpath.hiveop=true;
 set hive.stats.fetch.column.stats=true;
+set hive.enable.semijoin.conversion=true;
 ;
 
 set hive.exec.reducers.max = 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/join31.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join31.q b/ql/src/test/queries/clientpositive/join31.q
index c79105f..aa17b4d 100644
--- a/ql/src/test/queries/clientpositive/join31.q
+++ b/ql/src/test/queries/clientpositive/join31.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.enable.semijoin.conversion=true;
 -- SORT_QUERY_RESULTS
 
 CREATE TABLE dest_j1(key STRING, cnt INT);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/multiMapJoin2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q
index cf5dbb0..c66dc66 100644
--- a/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -3,6 +3,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=6000;
+set hive.enable.semijoin.conversion=true;
 
 -- we will generate one MR job.
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index c01ae70..5b22dce 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
+
 -- SORT_QUERY_RESULTS
 
 -- non agg, non corr
@@ -118,3 +119,104 @@ from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li o
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
 ;
+
+
+--where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+
+--lhs contains non-simple expression
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type);
+select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type);
+
+explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
+select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
+
+--lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
+select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
+
+-- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
+select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
+
+explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+
+-- correlated query, multiple correlated variables referring to different outer var
+explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+
+-- correlated var refers to outer table alias
+explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
+select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
+ 
+-- correlated var refers to outer table alias which is an expression 
+explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
+select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
+
+-- where plus having
+explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- where with having, correlated
+explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- subquery with order by
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
+select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
+
+--order by with limit
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+
+-- union, uncorr
+explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
+select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
+
+-- corr, subquery has another subquery in from
+explain select p_mfgr, b.p_name, p_size from part b where b.p_name in 
+  (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
+select p_mfgr, b.p_name, p_size from part b where b.p_name in 
+  (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
+
+-- join in subquery, correlated predicate with only one table
+explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
+select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
+
+-- join in subquery, correlated predicate with both inner tables, same outer var
+explain select p_partkey from part where p_name in 
+	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
+select p_partkey from part where p_name in 
+	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
+
+-- join in subquery, correlated predicate with both inner tables, different outer var
+explain select p_partkey from part where p_name in 
+	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type);
+
+-- subquery within from 
+explain select p_partkey from 
+	(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
+select p_partkey from 
+	(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
+
+
+create table tempty(i int);
+create table tnull(i int);
+insert into tnull values(NULL) , (NULL);
+
+-- empty inner table, non-null sq key, expected empty result
+select * from part where p_size IN (select i from tempty);
+
+-- empty inner table, null sq key, expected empty result
+select * from tnull where i IN (select i from tempty);
+
+-- null inner table, non-null sq key
+select * from part where p_size IN (select i from tnull);
+
+-- null inner table, null sq key
+select * from tnull where i IN (select i from tnull);
+
+drop table tempty;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q
index 3f4fb7f..c29e63f 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+
 -- non agg, non corr
 explain
 select * 
@@ -76,10 +77,10 @@ order by p_mfgr, p_size
 ;
 
 -- non agg, non corr, Group By in Parent Query
-select li.l_partkey, count(*) 
-from lineitem li 
-where li.l_linenumber = 1 and 
-  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 group by li.l_partkey
 ;
 
@@ -103,3 +104,127 @@ from T1_v where T1_v.key not in (select T2_v.key from T2_v);
 
 select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v);
+
+--where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+
+--lhs contains non-simple expression
+explain select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type);
+select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type);
+
+explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type);
+select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type);
+
+--lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name);
+select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name);
+
+-- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type);
+select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type);
+
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+
+-- correlated query, multiple correlated variables referring to different outer var
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+
+-- correlated var refers to outer table alias
+explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand);
+select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand);
+ 
+-- correlated var refers to outer table alias which is an expression 
+explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size);
+select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size);
+
+-- where plus having
+explain select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- where with having, correlated
+explain select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- subquery with order by
+explain select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand;
+select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand;
+
+--order by with limit
+explain select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+
+-- union, uncorr
+explain select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part);
+select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part);
+
+explain select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand);
+select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand);
+
+--nullability tests
+CREATE TABLE t1 (c1 INT, c2 CHAR(100));
+INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij');
+
+CREATE TABLE t2 (c1 INT);
+INSERT INTO t2 VALUES (null), (2), (100);
+
+-- uncorr
+explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2);
+SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2);
+
+-- corr
+explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c2=t2.c1);
+SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1);
+
+DROP TABLE t1;
+DROP TABLE t2;
+
+-- corr, nullability, should not produce any result
+create table t1(a int, b int);
+insert into t1 values(1,0), (1,0),(1,0);
+
+create table t2(a int, b int);
+insert into t2 values(2,1), (3,1), (NULL,1);
+
+explain select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a);
+select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a);
+drop table t1;
+drop table t2;
+
+
+-- coor, nullability, should produce result
+create table t7(i int, j int);
+insert into t7 values(null, 5), (4, 15);
+
+create table fixOb(i int, j int);
+insert into fixOb values(-1, 5), (-1, 15);
+
+explain select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j);
+select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j);
+
+drop table t7;
+drop table fixOb;
+
+create table t(i int, j int);
+insert into t values(1,2), (4,5), (7, NULL);
+
+
+-- case with empty inner result (t1.j=t.j=NULL) and null subquery key(t.j = NULL)
+explain select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j);
+select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j);
+
+-- case with empty inner result (t1.j=t.j=NULL) and non-null subquery key(t.i is never null)
+explain select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j);
+select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j);
+
+-- case with non-empty inner result and null subquery key(t.j is null)
+explain select t.i from t where t.j NOT IN (select t1.i from t t1 );
+select t.i from t where t.j NOT IN (select t1.i from t t1 );
+
+-- case with non-empty inner result and non-null subquery key(t.i is never null)
+explain select t.i from t where t.i NOT IN (select t1.i from t t1 );
+select t.i from t where t.i NOT IN (select t1.i from t t1 );
+
+drop table t1;
+

[07/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index 21a48ec..36e3e6e 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -22,7 +22,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -30,19 +31,16 @@ STAGE PLANS:
                 TableScan
                   alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (key > '9') (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -51,37 +49,45 @@ STAGE PLANS:
                   Filter Operator
                     predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
+                    Group By Operator
+                      keys: key (type: string)
+                      mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -140,7 +146,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+        Reducer 7 <- Map 6 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -148,57 +157,113 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Map 3 
             Map Operator Tree:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: string), _col1 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: string)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                          Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string)
+                    outputColumnNames: value
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: value (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+        Reducer 7 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -265,9 +330,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
         Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2)
         Reducer 5 <- Reducer 4 (GROUP, 1)
+        Reducer 6 <- Reducer 5 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -275,19 +341,16 @@ STAGE PLANS:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: UDFToDouble(p_size) is not null (type: boolean)
+                  Select Operator
+                    expressions: p_name (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double)
-                      outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      key expressions: UDFToDouble(_col1) (type: double)
+                      sort order: +
+                      Map-reduce partition columns: UDFToDouble(_col1) (type: double)
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col2 (type: double)
-                        sort order: +
-                        Map-reduce partition columns: _col2 (type: double)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col1 (type: int)
+                      value expressions: _col0 (type: string), _col1 (type: int)
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -303,9 +366,9 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: double)
+                  0 UDFToDouble(_col1) (type: double)
                   1 _col0 (type: double)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
@@ -366,19 +429,28 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: _col0 is not null (type: boolean)
+                Group By Operator
+                  keys: _col0 (type: double)
+                  mode: hash
+                  outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: double)
-                    mode: hash
-                    outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: double)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: double)
                     Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: double)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: double)
-                      Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+        Reducer 6 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: double)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: double)
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -434,9 +506,12 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
         Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2)
-        Reducer 5 <- Reducer 4 (GROUP, 2)
+        Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2)
+        Reducer 6 <- Reducer 5 (GROUP, 2)
+        Reducer 7 <- Reducer 6 (GROUP, 2)
+        Reducer 9 <- Map 8 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -444,41 +519,54 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (p_mfgr is not null and p_size is not null) (type: boolean)
+                  Select Operator
+                    expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
-                      outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string), _col2 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string), _col2 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string)
+                      value expressions: _col0 (type: string)
         Map 3 
             Map Operator Tree:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: p_mfgr is not null (type: boolean)
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_size (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
                     Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: p_mfgr (type: string), p_size (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: p_mfgr (type: string)
+                    TopN Hash Memory Usage: 0.1
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_mfgr (type: string)
+                    outputColumnNames: p_mfgr
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: p_mfgr (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                      TopN Hash Memory Usage: 0.1
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col1 (type: string), _col2 (type: int)
-                  1 _col0 (type: string), _col1 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
@@ -526,39 +614,84 @@ STAGE PLANS:
                       expressions: _col2 (type: string), _col5 (type: int)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: min(_col1)
-                        keys: _col0 (type: string)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col1 (type: int)
+                        value expressions: _col1 (type: int)
         Reducer 5 
             Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: string), _col1 (type: int)
+                  outputColumnNames: _col2, _col1
+                  Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: min(_col1)
+                    keys: _col2 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+        Reducer 6 
+            Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: _col1 is not null (type: boolean)
-                  Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
-                      Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                    Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+        Reducer 7 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                    Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE
+        Reducer 9 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -618,8 +751,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+        Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 7 <- Map 6 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -627,70 +762,118 @@ STAGE PLANS:
                 TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
+                    predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-        Map 3 
+                        value expressions: _col0 (type: string)
+        Map 6 
             Map Operator Tree:
                 TableScan
-                  alias: a
+                  alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((key > '9') and value is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: value (type: string)
+                    outputColumnNames: value
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
-                      keys: key (type: string), value (type: string)
+                      keys: value (type: string)
                       mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 4 
             Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string), KEY._col1 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  keys: _col0 (type: string), _col1 (type: string)
+                  keys: _col0 (type: string), _col2 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: string)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+        Reducer 7 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -773,43 +956,62 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
-        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+        Reducer 7 <- Map 6 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: lineitem
+                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: l_partkey is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: l_partkey (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
                   alias: li
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean)
+                    predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean)
                     Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: int)
                         Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: int), _col2 (type: int)
-        Map 4 
+                        value expressions: _col0 (type: int), _col2 (type: int)
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
+                    predicate: (l_shipmode = 'AIR') (type: boolean)
                     Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: l_orderkey (type: int)
-                      outputColumnNames: _col0
+                      outputColumnNames: l_orderkey
                       Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
-                        keys: _col0 (type: int)
+                        keys: l_orderkey (type: int)
                         mode: hash
                         outputColumnNames: _col0
                         Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
@@ -818,41 +1020,35 @@ STAGE PLANS:
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
                           Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: lineitem
-                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: l_partkey is not null (type: boolean)
-                    Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: l_partkey (type: int)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col2
+                  1 _col1 (type: int)
+                outputColumnNames: _col0, _col1, _col3
                 Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col1 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col1 (type: int)
                   Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: int)
-        Reducer 3 
+                  value expressions: _col0 (type: int), _col3 (type: int)
+        Reducer 4 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -860,10 +1056,10 @@ STAGE PLANS:
                 keys:
                   0 _col1 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col2, _col4
+                outputColumnNames: _col0, _col3
                 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col4 (type: int), _col2 (type: int)
+                  expressions: _col0 (type: int), _col3 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
@@ -873,18 +1069,18 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
+        Reducer 7 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -934,3 +1130,3506 @@ POSTHOOK: Input: default@lineitem
 #### A masked pattern was here ####
 108570	8571
 4297	1798
+PREHOOK: query: --where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+PREHOOK: type: QUERY
+POSTHOOK: query: --where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+        Reducer 6 <- Reducer 5 (GROUP, 2)
+        Reducer 8 <- Map 7 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean)
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col4 (type: string), _col5 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col4 (type: string), _col5 (type: int)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_type (type: string)
+                    outputColumnNames: p_type
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col4 (type: string), _col5 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: min(_col1)
+                  keys: _col2 (type: string), _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: int)
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: int)
+                  outputColumnNames: _col1, _col2
+                  Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col1 (type: string), _col2 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                      Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+        Reducer 6 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                    Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE
+        Reducer 8 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+105685	almond antique violet chocolate turquoise	Manufacturer#2	Brand#22	MEDIUM ANODIZED COPPER	14	MED CAN	1690.68	ly pending requ
+110592	almond antique salmon chartreuse burlywood	Manufacturer#1	Brand#15	PROMO BURNISHED NICKEL	6	JUMBO PKG	1602.59	 to the furiously
+112398	almond antique metallic orange dim	Manufacturer#3	Brand#32	MEDIUM BURNISHED BRASS	19	JUMBO JAR	1410.39	ole car
+132666	almond aquamarine rose maroon antique	Manufacturer#2	Brand#24	SMALL POLISHED NICKEL	25	MED BOX	1698.66	even 
+144293	almond antique olive coral navajo	Manufacturer#3	Brand#34	STANDARD POLISHED STEEL	45	JUMBO CAN	1337.29	ag furiously about 
+146985	almond aquamarine midnight light salmon	Manufacturer#2	Brand#23	MEDIUM BURNISHED COPPER	2	SM CASE	2031.98	s cajole caref
+15103	almond aquamarine dodger light gainsboro	Manufacturer#5	Brand#53	ECONOMY BURNISHED STEEL	46	LG PACK	1018.1	packages hinder carefu
+155733	almond antique sky peru orange	Manufacturer#5	Brand#53	SMALL PLATED BRASS	2	WRAP DRUM	1788.73	furiously. bra
+17273	almond antique forest lavender goldenrod	Manufacturer#3	Brand#35	PROMO ANODIZED TIN	14	JUMBO CASE	1190.27	along the
+17927	almond aquamarine yellow dodger mint	Manufacturer#4	Brand#41	ECONOMY BRUSHED COPPER	7	SM PKG	1844.92	ites. eve
+191709	almond antique violet turquoise frosted	Manufacturer#2	Brand#22	ECONOMY POLISHED STEEL	40	MED BOX	1800.7	 haggle
+195606	almond aquamarine sandy cyan gainsboro	Manufacturer#2	Brand#25	STANDARD PLATED TIN	18	SM PKG	1701.6	ic de
+33357	almond azure aquamarine papaya violet	Manufacturer#4	Brand#41	STANDARD ANODIZED TIN	12	WRAP CASE	1290.35	reful
+40982	almond antique misty red olive	Manufacturer#3	Brand#32	ECONOMY PLATED COPPER	1	LG PKG	1922.98	c foxes can s
+42669	almond antique medium spring khaki	Manufacturer#5	Brand#51	STANDARD BURNISHED TIN	6	MED CAN	1611.66	sits haggl
+45261	almond aquamarine floral ivory bisque	Manufacturer#4	Brand#42	SMALL PLATED STEEL	27	WRAP CASE	1206.26	careful
+48427	almond antique violet mint lemon	Manufacturer#4	Brand#42	PROMO POLISHED STEEL	39	SM CASE	1375.42	hely ironic i
+49671	almond antique gainsboro frosted violet	Manufacturer#4	Brand#41	SMALL BRUSHED BRASS	10	SM BOX	1620.67	ccounts run quick
+65667	almond aquamarine pink moccasin thistle	Manufacturer#1	Brand#12	LARGE BURNISHED STEEL	42	JUMBO CASE	1632.66	e across the expr
+78486	almond azure blanched chiffon midnight	Manufacturer#5	Brand#52	LARGE BRUSHED BRASS	23	MED BAG	1464.48	hely blith
+85768	almond antique chartreuse lavender yellow	Manufacturer#1	Brand#12	LARGE BRUSHED STEEL	34	SM BAG	1753.76	refull
+86428	almond aquamarine burnished black steel	Manufacturer#1	Brand#12	STANDARD ANODIZED STEEL	28	WRAP BAG	1414.42	arefully 
+90681	almond antique chartreuse khaki white	Manufacturer#3	Brand#31	MEDIUM BURNISHED TIN	17	SM CASE	1671.68	are slyly after the sl
+PREHOOK: query: --lhs contains non-simple expression
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: --lhs contains non-simple expression
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: (_col5 - 1) (type: int)
+                      sort order: +
+                      Map-reduce partition columns: (_col5 - 1) (type: int)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: p_type, p_size
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: min(p_size)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 (_col5 - 1) (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col1
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+112398	almond antique metallic orange dim	Manufacturer#3	Brand#32	MEDIUM BURNISHED BRASS	19	JUMBO JAR	1410.39	ole car
+121152	almond antique burnished rose metallic	Manufacturer#1	Brand#14	PROMO PLATED TIN	2	JUMBO BOX	1173.15	e pinto beans h
+121152	almond antique burnished rose metallic	Manufacturer#1	Brand#14	PROMO PLATED TIN	2	JUMBO BOX	1173.15	e pinto beans h
+146985	almond aquamarine midnight light salmon	Manufacturer#2	Brand#23	MEDIUM BURNISHED COPPER	2	SM CASE	2031.98	s cajole caref
+15103	almond aquamarine dodger light gainsboro	Manufacturer#5	Brand#53	ECONOMY BURNISHED STEEL	46	LG PACK	1018.1	packages hinder carefu
+155733	almond antique sky peru orange	Manufacturer#5	Brand#53	SMALL PLATED BRASS	2	WRAP DRUM	1788.73	furiously. bra
+17927	almond aquamarine yellow dodger mint	Manufacturer#4	Brand#41	ECONOMY BRUSHED COPPER	7	SM PKG	1844.92	ites. eve
+191709	almond antique violet turquoise frosted	Manufacturer#2	Brand#22	ECONOMY POLISHED STEEL	40	MED BOX	1800.7	 haggle
+195606	almond aquamarine sandy cyan gainsboro	Manufacturer#2	Brand#25	STANDARD PLATED TIN	18	SM PKG	1701.6	ic de
+86428	almond aquamarine burnished black steel	Manufacturer#1	Brand#12	STANDARD ANODIZED STEEL	28	WRAP BAG	1414.42	arefully 
+PREHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: (_col0 * _col5) (type: int)
+                      sort order: +
+                      Map-reduce partition columns: (_col0 * _col5) (type: int)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_type (type: string), p_partkey (type: int)
+                    outputColumnNames: p_type, p_partkey
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: min(p_partkey)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 (_col0 * _col5) (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col1
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+40982	almond antique misty red olive	Manufacturer#3	Brand#32	ECONOMY PLATED COPPER	1	LG PKG	1922.98	c foxes can s
+PREHOOK: query: --lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+PREHOOK: type: QUERY
+POSTHOOK: query: --lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Reducer 2 (GROUP, 1)
+        Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2)
+        Reducer 6 <- Reducer 5 (GROUP, 2)
+        Reducer 8 <- Map 7 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: e
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_name (type: string), p_size (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), (_col1 + 100) (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int)
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: e
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_name (type: string)
+                    outputColumnNames: p_name
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: p_name (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string), (_col1 + 100) (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: string), _col0 (type: int)
+                  outputColumnNames: _col2, _col0
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col2 (type: string), _col0 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+                      Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+        Reducer 6 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+                    Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+        Reducer 8 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: floor(_col7) (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: floor(_col7) (type: bigint)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_type (type: string), p_retailprice (type: double)
+                    outputColumnNames: p_type, p_retailprice
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: min(p_retailprice)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: double)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 floor(_col7) (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: floor(_col1) (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: bigint)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _c

<TRUNCATED>