You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/18 18:52:30 UTC

[34/48] hive git commit: HIVE-19940: Push predicates with deterministic UDFs with RBO (Janaki Lahorani, reviewed by Vineet Garg, Naveen Gangam)

HIVE-19940: Push predicates with deterministic UDFs with RBO (Janaki Lahorani, reviewed by Vineet Garg, Naveen Gangam)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/34adf31a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/34adf31a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/34adf31a

Branch: refs/heads/master-txnstats
Commit: 34adf31af4189c42c9944c7e41820dcdb11bd01a
Parents: 0966a38
Author: Naveen Gangam <ng...@apache.org>
Authored: Mon Jul 16 12:23:08 2018 -0400
Committer: Naveen Gangam <ng...@apache.org>
Committed: Mon Jul 16 12:23:08 2018 -0400

----------------------------------------------------------------------
 .../hive/ql/ppd/ExprWalkerProcFactory.java      |  30 +-
 .../clientpositive/ppd_deterministic_expr.q     | 143 +++++
 .../test/queries/clientpositive/ppd_udf_col.q   |  48 ++
 .../clientpositive/llap/check_constraint.q.out  |  17 +-
 .../llap/enforce_constraint_notnull.q.out       |  17 +-
 .../results/clientpositive/llap/lineage3.q.out  |   2 +-
 .../clientpositive/llap/subquery_in.q.out       |  22 +-
 .../clientpositive/llap/subquery_notin.q.out    |  68 ++-
 .../clientpositive/masking_disablecbo_2.q.out   | 219 ++++----
 .../clientpositive/perf/tez/query8.q.out        | 116 ++--
 .../clientpositive/ppd_deterministic_expr.q.out | 553 +++++++++++++++++++
 .../results/clientpositive/ppd_udf_col.q.out    | 409 ++++++++++++++
 .../results/clientpositive/union_offcbo.q.out   |  34 +-
 13 files changed, 1425 insertions(+), 253 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index b01a9ba..1c662d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -104,7 +104,11 @@ public final class ExprWalkerProcFactory {
           return false;
         } else {
           if (exp instanceof ExprNodeGenericFuncDesc) {
-            isCandidate = false;
+            if (isDeterministic((ExprNodeGenericFuncDesc) exp)) {
+              isCandidate = true;
+            } else {
+              isCandidate = false;
+            }
           }
           if (exp instanceof ExprNodeColumnDesc && ci == null) {
             ExprNodeColumnDesc column = (ExprNodeColumnDesc)exp;
@@ -136,6 +140,30 @@ public final class ExprWalkerProcFactory {
   }
 
   /**
+   *
+   * @param funcDesc function descriptor
+   * @return true if the function is deterministic false otherwise
+   */
+  public static boolean isDeterministic(ExprNodeGenericFuncDesc funcDesc) {
+    if (FunctionRegistry.isConsistentWithinQuery(funcDesc.getGenericUDF())) {
+      // check whether the children or deterministic
+      for (ExprNodeDesc exprNodeDesc : funcDesc.getChildren()) {
+        if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
+          if (!isDeterministic((ExprNodeGenericFuncDesc) exprNodeDesc)) {
+            // some child is not deterministic - return false
+            return false;
+          }
+        }
+      }
+      // all children are deterministic - return true
+      return true;
+    }
+
+    // function is not deterministic - return false
+    return false;
+  }
+
+  /**
    * FieldExprProcessor.
    *
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/queries/clientpositive/ppd_deterministic_expr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ppd_deterministic_expr.q b/ql/src/test/queries/clientpositive/ppd_deterministic_expr.q
new file mode 100644
index 0000000..47c8849
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/ppd_deterministic_expr.q
@@ -0,0 +1,143 @@
+set hive.auto.convert.join=false;
+set hive.optimize.index.filter=true;
+set hive.cbo.enable=false;
+
+CREATE TABLE `testb`(
+   `cola` string COMMENT '',
+   `colb` string COMMENT '',
+   `colc` string COMMENT '')
+PARTITIONED BY (
+   `part1` string,
+   `part2` string,
+   `part3` string)
+
+STORED AS AVRO;
+
+CREATE TABLE `testa`(
+   `col1` string COMMENT '',
+   `col2` string COMMENT '',
+   `col3` string COMMENT '',
+   `col4` string COMMENT '',
+   `col5` string COMMENT '')
+PARTITIONED BY (
+   `part1` string,
+   `part2` string,
+   `part3` string)
+STORED AS AVRO;
+
+insert into testA partition (part1='US', part2='ABC', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testA partition (part1='UK', part2='DEF', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testA partition (part1='US', part2='DEF', part3='200')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testA partition (part1='CA', part2='ABC', part3='300')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testB partition (part1='CA', part2='ABC', part3='300')
+values ('600', '700', 'abc'), ('601', '701', 'abcd');
+
+insert into testB partition (part1='CA', part2='ABC', part3='400')
+values ( '600', '700', 'abc'), ( '601', '701', 'abcd');
+
+insert into testB partition (part1='UK', part2='PQR', part3='500')
+values ('600', '700', 'abc'), ('601', '701', 'abcd');
+
+insert into testB partition (part1='US', part2='DEF', part3='200')
+values ( '600', '700', 'abc'), ('601', '701', 'abcd');
+
+insert into testB partition (part1='US', part2='PQR', part3='123')
+values ( '600', '700', 'abc'), ('601', '701', 'abcd');
+
+-- views with deterministic functions
+create view viewDeterministicUDFA partitioned on (vpart1, vpart2, vpart3) as select
+ cast(col1 as decimal(38,18)) as vcol1,
+ cast(col2 as decimal(38,18)) as vcol2,
+ cast(col3 as decimal(38,18)) as vcol3,
+ cast(col4 as decimal(38,18)) as vcol4,
+ cast(col5 as char(10)) as vcol5,
+ cast(part1 as char(2)) as vpart1,
+ cast(part2 as char(3)) as vpart2,
+ cast(part3 as char(3)) as vpart3
+ from testa
+where part1 in ('US', 'CA');
+
+create view viewDeterministicUDFB partitioned on (vpart1, vpart2, vpart3) as select
+ cast(cola as decimal(38,18)) as vcolA,
+ cast(colb as decimal(38,18)) as vcolB,
+ cast(colc as char(10)) as vcolC,
+ cast(part1 as char(2)) as vpart1,
+ cast(part2 as char(3)) as vpart2,
+ cast(part3 as char(3)) as vpart3
+ from testb
+where part1 in ('US', 'CA');
+
+-- views without function reference
+create view viewNoUDFA partitioned on (part1, part2, part3) as select
+ cast(col1 as decimal(38,18)) as vcol1,
+ cast(col2 as decimal(38,18)) as vcol2,
+ cast(col3 as decimal(38,18)) as vcol3,
+ cast(col4 as decimal(38,18)) as vcol4,
+ cast(col5 as char(10)) as vcol5,
+ part1,
+ part2,
+ part3
+ from testa
+where part1 in ('US', 'CA');
+
+create view viewNoUDFB partitioned on (part1, part2, part3) as select
+ cast(cola as decimal(38,18)) as vcolA,
+ cast(colb as decimal(38,18)) as vcolB,
+ cast(colc as char(10)) as vcolC,
+ part1,
+ part2,
+ part3
+ from testb
+where part1 in ('US', 'CA');
+
+-- query referencing deterministic functions
+explain
+select vcol1, vcol2, vcol3, vcola, vcolb
+from viewDeterministicUDFA a inner join viewDeterministicUDFB b
+on a.vpart1 = b.vpart1
+and a.vpart2 = b.vpart2
+and a.vpart3 = b.vpart3
+and a.vpart1 = 'US'
+and a.vpart2 = 'DEF'
+and a.vpart3 = '200';
+
+select vcol1, vcol2, vcol3, vcola, vcolb
+from viewDeterministicUDFA a inner join viewDeterministicUDFB b
+on a.vpart1 = b.vpart1
+and a.vpart2 = b.vpart2
+and a.vpart3 = b.vpart3
+and a.vpart1 = 'US'
+and a.vpart2 = 'DEF'
+and a.vpart3 = '200';
+
+-- query with views referencing no udfs
+explain
+select vcol1, vcol2, vcol3, vcola, vcolb
+from viewNoUDFA a inner join viewNoUDFB b
+on a.part1 = b.part1
+and a.part2 = b.part2
+and a.part3 = b.part3
+and a.part1 = 'US'
+and a.part2 = 'DEF'
+and a.part3 = '200';
+
+select vcol1, vcol2, vcol3, vcola, vcolb
+from viewNoUDFA a inner join viewNoUDFB b
+on a.part1 = b.part1
+and a.part2 = b.part2
+and a.part3 = b.part3
+and a.part1 = 'US'
+and a.part2 = 'DEF'
+and a.part3 = '200';

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/queries/clientpositive/ppd_udf_col.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ppd_udf_col.q b/ql/src/test/queries/clientpositive/ppd_udf_col.q
index 9c7d4fd..ac2f861 100644
--- a/ql/src/test/queries/clientpositive/ppd_udf_col.q
+++ b/ql/src/test/queries/clientpositive/ppd_udf_col.q
@@ -48,3 +48,51 @@ EXPLAIN
 SELECT key,randum123, v10
 FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a
 WHERE a.v10 <= 200;
+
+set hive.cbo.enable=false;
+
+EXPLAIN
+SELECT key, randum123
+FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a
+WHERE randum123 <=0.1;
+
+EXPLAIN
+SELECT * FROM
+(
+SELECT key, randum123
+FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a
+WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20;
+
+EXPLAIN
+SELECT key,randum123, h4
+FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a
+WHERE a.h4 <= 3;
+
+EXPLAIN
+SELECT key,randum123, v10
+FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a
+WHERE a.v10 <= 200;
+
+set hive.ppd.remove.duplicatefilters=false;
+
+EXPLAIN
+SELECT key, randum123
+FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a
+WHERE randum123 <=0.1;
+
+EXPLAIN
+SELECT * FROM
+(
+SELECT key, randum123
+FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a
+WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20;
+
+EXPLAIN
+SELECT key,randum123, h4
+FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a
+WHERE a.h4 <= 3;
+
+EXPLAIN
+SELECT key,randum123, v10
+FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a
+WHERE a.v10 <= 200;

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/llap/check_constraint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/check_constraint.q.out b/ql/src/test/results/clientpositive/llap/check_constraint.q.out
index 411b4a6..e4cd97e 100644
--- a/ql/src/test/results/clientpositive/llap/check_constraint.q.out
+++ b/ql/src/test/results/clientpositive/llap/check_constraint.q.out
@@ -1820,20 +1820,17 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key < 10) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: ((key < 10) and enforce_constraint((CAST( key AS decimal(5,2)) is not null and (CAST( key AS decimal(5,2)) >= CAST( UDFToInteger(key) AS decimal(5,2))) is not false))) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)), value (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 166 Data size: 34362 Basic stats: COMPLETE Column stats: COMPLETE
-                      Filter Operator
-                        predicate: enforce_constraint((_col1 is not null and (_col1 >= CAST( _col0 AS decimal(5,2))) is not false)) (type: boolean)
+                      Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          sort order: 
-                          Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string)
+                        value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string)
                   Filter Operator
                     predicate: ((key < 20) and (key > 10) and enforce_constraint(value is not null)) (type: boolean)
                     Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
index 5a3f519..5e766c8 100644
--- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
+++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
@@ -3424,20 +3424,17 @@ STAGE PLANS:
                   alias: src
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key < 10) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: ((key < 10) and enforce_constraint((UDFToInteger(key) is not null and value is not null))) (type: boolean)
+                    Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)), value (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 166 Data size: 34362 Basic stats: COMPLETE Column stats: COMPLETE
-                      Filter Operator
-                        predicate: enforce_constraint((_col0 is not null and _col2 is not null)) (type: boolean)
+                      Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          sort order: 
-                          Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string)
+                        value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string)
                   Filter Operator
                     predicate: ((key < 20) and (key > 10) and enforce_constraint(value is not null)) (type: boolean)
                     Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index 27dd874..e05d452 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -180,7 +180,7 @@ PREHOOK: Input: default@src1
 #### A masked pattern was here ####
 {"version":"1.0","engine":"tez","database":"default","hash":"94e9cc0a67801fe1503a3cb0c5029d59","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0D)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.ctinyint is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1
 .value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 311	val_311
-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select key, value from src1
 where key not in (select key+18 from src1) order by key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index 8f3912e..8007025 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -4575,23 +4575,19 @@ STAGE PLANS:
                   predicate: (_col1 is not null and _col2 is not null) (type: boolean)
                   Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
-                    expressions: (_col1 / _col2) (type: double), _col0 (type: int)
+                    expressions: _col0 (type: int), (_col1 / _col2) (type: double)
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col1 (type: int), _col0 (type: double)
+                    Group By Operator
+                      keys: _col0 (type: int), _col1 (type: double)
+                      mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
-                      Group By Operator
-                        keys: _col0 (type: int), _col1 (type: double)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: double)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: double)
                         Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int), _col1 (type: double)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: int), _col1 (type: double)
-                          Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index 469ec69..11facd1 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -1416,7 +1416,7 @@ POSTHOOK: Input: default@t1_v
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@T2_v
 POSTHOOK: Lineage: T2_v.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: explain
 select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
@@ -1478,7 +1478,7 @@ STAGE PLANS:
                     Select Operator
                       expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         keys: _col0 (type: string)
                         mode: hash
@@ -1570,7 +1570,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
 PREHOOK: type: QUERY
@@ -2427,7 +2427,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
 26
-Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: explain select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type)
@@ -2550,16 +2550,24 @@ STAGE PLANS:
                       sort order: 
                       Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col0 (type: bigint), _col1 (type: bigint)
-                  Group By Operator
-                    keys: _col0 (type: bigint)
-                    mode: hash
+                Select Operator
+                  expressions: _col1 (type: double)
+                  outputColumnNames: _col1
+                  Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: floor(_col1) (type: bigint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: bigint)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: bigint)
+                    Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: _col0 (type: bigint)
+                      mode: hash
+                      outputColumnNames: _col0
                       Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: bigint)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
         Reducer 6 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -2597,7 +2605,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
@@ -3106,7 +3114,7 @@ STAGE PLANS:
                     Select Operator
                       expressions: (UDFToDouble(p_type) + 2.0D) (type: double), p_brand (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 26 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         keys: _col0 (type: double), _col1 (type: string)
                         mode: hash
@@ -3223,11 +3231,12 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 10 <- Map 9 (SIMPLE_EDGE)
+        Reducer 11 <- Map 9 (SIMPLE_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
         Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE)
         Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-        Reducer 7 <- Map 4 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE)
+        Reducer 7 <- Map 4 (SIMPLE_EDGE), Reducer 11 (ONE_TO_ONE_EDGE)
         Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
@@ -3286,7 +3295,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: part
-                  filterExpr: p_size is not null (type: boolean)
+                  filterExpr: (p_size is not null or p_size is not null) (type: boolean)
                   Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: p_size is not null (type: boolean)
@@ -3305,6 +3314,23 @@ STAGE PLANS:
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
                           Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: p_size is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: (p_size + 1) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 10 
@@ -3320,6 +3346,14 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 11 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
@@ -3822,7 +3856,7 @@ STAGE PLANS:
                     Select Operator
                       expressions: concat('v', value) (type: string), key (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         keys: _col1 (type: string), _col0 (type: string)
                         mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/masking_disablecbo_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_disablecbo_2.q.out b/ql/src/test/results/clientpositive/masking_disablecbo_2.q.out
index fad0120..5a70e00 100644
--- a/ql/src/test/results/clientpositive/masking_disablecbo_2.q.out
+++ b/ql/src/test/results/clientpositive/masking_disablecbo_2.q.out
@@ -24,25 +24,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10)) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
                   Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
 
   Stage: Stage-0
@@ -81,25 +78,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0)) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10) and (_col0 > 0)) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0)) (type: boolean)
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
                   Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
 
   Stage: Stage-0
@@ -135,27 +129,21 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and reverse(value) is not null and (UDFToInteger(key) > 0)) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10) and (_col0 > 0)) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0) and reverse(value) is not null) (type: boolean)
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: _col1 is not null (type: boolean)
-                    Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col1 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col1 (type: string)
-                      Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int)
+                  value expressions: _col0 (type: int)
           TableScan
             alias: a
             filterExpr: key is not null (type: boolean)
@@ -210,47 +198,38 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and UDFToDouble(UDFToInteger(key)) is not null) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10) and UDFToDouble(_col0) is not null) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and UDFToDouble(UDFToInteger(key)) is not null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: UDFToDouble(_col0) (type: double)
+                  sort order: +
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: double)
                   Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: UDFToDouble(_col0) (type: double)
-                    sort order: +
-                    Map-reduce partition columns: UDFToDouble(_col0) (type: double)
-                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: int), _col1 (type: string)
+                  value expressions: _col0 (type: int), _col1 (type: string)
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and UDFToDouble(reverse(value)) is not null and (UDFToInteger(key) > 0)) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10) and (_col0 > 0)) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0) and UDFToDouble(reverse(value)) is not null) (type: boolean)
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: UDFToDouble(_col1) (type: double)
+                  sort order: +
+                  Map-reduce partition columns: UDFToDouble(_col1) (type: double)
                   Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: UDFToDouble(_col1) is not null (type: boolean)
-                    Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: UDFToDouble(_col1) (type: double)
-                      sort order: +
-                      Map-reduce partition columns: UDFToDouble(_col1) (type: double)
-                      Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int)
+                  value expressions: _col0 (type: int)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -288,56 +267,50 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10)) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  Union
+                Union
+                  Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: int), _col1 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: _col0 (type: int), _col1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
                       Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
-                        Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
           TableScan
             alias: src
+            filterExpr: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0)) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: UDFToInteger(key) (type: int), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Filter Operator
-                predicate: (((_col0 % 2) = 0) and (_col0 < 10) and (_col0 > 0)) (type: boolean)
+            Filter Operator
+              predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0)) (type: boolean)
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), reverse(_col1) (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
-                  Union
+                Union
+                  Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: int), _col1 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: _col0 (type: int), _col1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
                       Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
-                        Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: int), KEY._col1 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/34adf31a/ql/src/test/results/clientpositive/perf/tez/query8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out
index d9b82b4..26c7d8b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query8.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query8.q.out
@@ -234,150 +234,150 @@ Stage-0
     limit:100
     Stage-1
       Reducer 5 vectorized
-      File Output Operator [FS_152]
-        Limit [LIM_151] (rows=100 width=88)
+      File Output Operator [FS_150]
+        Limit [LIM_149] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_150] (rows=348477374 width=88)
+          Select Operator [SEL_148] (rows=348477374 width=88)
             Output:["_col0","_col1"]
           <-Reducer 4 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_149]
-              Group By Operator [GBY_148] (rows=348477374 width=88)
+            SHUFFLE [RS_147]
+              Group By Operator [GBY_146] (rows=348477374 width=88)
                 Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
               <-Reducer 3 [SIMPLE_EDGE]
                 SHUFFLE [RS_57]
                   PartitionCols:_col0
                   Group By Operator [GBY_56] (rows=696954748 width=88)
                     Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col8
-                    Merge Join Operator [MERGEJOIN_119] (rows=696954748 width=88)
+                    Merge Join Operator [MERGEJOIN_117] (rows=696954748 width=88)
                       Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col8"]
                     <-Reducer 12 [SIMPLE_EDGE]
                       SHUFFLE [RS_53]
                         PartitionCols:_col1
-                        Merge Join Operator [MERGEJOIN_118] (rows=1874 width=1911)
-                          Conds:RS_139.substr(_col0, 1, 2)=RS_142.substr(_col2, 1, 2)(Inner),Output:["_col1","_col2"]
+                        Merge Join Operator [MERGEJOIN_116] (rows=1874 width=1911)
+                          Conds:RS_137.substr(_col0, 1, 2)=RS_140.substr(_col2, 1, 2)(Inner),Output:["_col1","_col2"]
                         <-Map 19 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_142]
+                          SHUFFLE [RS_140]
                             PartitionCols:substr(_col2, 1, 2)
-                            Select Operator [SEL_141] (rows=1704 width=1910)
+                            Select Operator [SEL_139] (rows=1704 width=1910)
                               Output:["_col0","_col1","_col2"]
-                              Filter Operator [FIL_140] (rows=1704 width=1910)
+                              Filter Operator [FIL_138] (rows=1704 width=1910)
                                 predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null)
                                 TableScan [TS_42] (rows=1704 width=1910)
                                   default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"]
                         <-Reducer 11 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_139]
+                          SHUFFLE [RS_137]
                             PartitionCols:substr(_col0, 1, 2)
-                            Select Operator [SEL_138] (rows=1 width=1014)
+                            Select Operator [SEL_136] (rows=1 width=1014)
                               Output:["_col0"]
-                              Filter Operator [FIL_137] (rows=1 width=1014)
+                              Filter Operator [FIL_135] (rows=1 width=1014)
                                 predicate:(_col1 = 2L)
-                                Group By Operator [GBY_136] (rows=6833333 width=1014)
+                                Group By Operator [GBY_134] (rows=6833333 width=1014)
                                   Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                                 <-Union 10 [SIMPLE_EDGE]
                                   <-Reducer 17 [CONTAINS] vectorized
-                                    Reduce Output Operator [RS_173]
+                                    Reduce Output Operator [RS_171]
                                       PartitionCols:_col0
-                                      Group By Operator [GBY_172] (rows=13666666 width=1014)
+                                      Group By Operator [GBY_170] (rows=13666666 width=1014)
                                         Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
-                                        Group By Operator [GBY_171] (rows=3666666 width=1014)
+                                        Group By Operator [GBY_169] (rows=3666666 width=1014)
                                           Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                                         <-Reducer 16 [SIMPLE_EDGE] vectorized
-                                          SHUFFLE [RS_170]
+                                          SHUFFLE [RS_168]
                                             PartitionCols:_col0
-                                            Group By Operator [GBY_169] (rows=7333333 width=1014)
+                                            Group By Operator [GBY_167] (rows=7333333 width=1014)
                                               Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                                              Select Operator [SEL_168] (rows=7333333 width=1014)
+                                              Select Operator [SEL_166] (rows=7333333 width=1014)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_167] (rows=7333333 width=1014)
+                                                Filter Operator [FIL_165] (rows=7333333 width=1014)
                                                   predicate:(_col1 > 10L)
-                                                  Group By Operator [GBY_166] (rows=22000000 width=1014)
+                                                  Group By Operator [GBY_164] (rows=22000000 width=1014)
                                                     Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                                                   <-Reducer 15 [SIMPLE_EDGE]
                                                     SHUFFLE [RS_25]
                                                       PartitionCols:_col0
                                                       Group By Operator [GBY_24] (rows=44000000 width=1014)
                                                         Output:["_col0","_col1"],aggregations:["count()"],keys:_col1
-                                                        Merge Join Operator [MERGEJOIN_117] (rows=44000000 width=1014)
-                                                          Conds:RS_162._col0=RS_165._col0(Inner),Output:["_col1"]
+                                                        Merge Join Operator [MERGEJOIN_115] (rows=44000000 width=1014)
+                                                          Conds:RS_160._col0=RS_163._col0(Inner),Output:["_col1"]
                                                         <-Map 14 [SIMPLE_EDGE] vectorized
-                                                          SHUFFLE [RS_162]
+                                                          SHUFFLE [RS_160]
                                                             PartitionCols:_col0
-                                                            Select Operator [SEL_161] (rows=40000000 width=1014)
+                                                            Select Operator [SEL_159] (rows=40000000 width=1014)
                                                               Output:["_col0","_col1"]
-                                                              Filter Operator [FIL_160] (rows=40000000 width=1014)
+                                                              Filter Operator [FIL_158] (rows=40000000 width=1014)
                                                                 predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null)
                                                                 TableScan [TS_14] (rows=40000000 width=1014)
                                                                   default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_zip"]
                                                         <-Map 18 [SIMPLE_EDGE] vectorized
-                                                          SHUFFLE [RS_165]
+                                                          SHUFFLE [RS_163]
                                                             PartitionCols:_col0
-                                                            Select Operator [SEL_164] (rows=40000000 width=860)
+                                                            Select Operator [SEL_162] (rows=40000000 width=860)
                                                               Output:["_col0"]
-                                                              Filter Operator [FIL_163] (rows=40000000 width=860)
+                                                              Filter Operator [FIL_161] (rows=40000000 width=860)
                                                                 predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null)
                                                                 TableScan [TS_17] (rows=80000000 width=860)
                                                                   default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_current_addr_sk","c_preferred_cust_flag"]
                                   <-Reducer 9 [CONTAINS] vectorized
-                                    Reduce Output Operator [RS_159]
+                                    Reduce Output Operator [RS_157]
                                       PartitionCols:_col0
-                                      Group By Operator [GBY_158] (rows=13666666 width=1014)
+                                      Group By Operator [GBY_156] (rows=13666666 width=1014)
                                         Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
-                                        Group By Operator [GBY_157] (rows=10000000 width=1014)
+                                        Group By Operator [GBY_155] (rows=10000000 width=1014)
                                           Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                                         <-Map 8 [SIMPLE_EDGE] vectorized
-                                          SHUFFLE [RS_156]
+                                          SHUFFLE [RS_154]
                                             PartitionCols:_col0
-                                            Group By Operator [GBY_155] (rows=20000000 width=1014)
+                                            Group By Operator [GBY_153] (rows=20000000 width=1014)
                                               Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                                              Select Operator [SEL_154] (rows=20000000 width=1014)
+                                              Select Operator [SEL_152] (rows=20000000 width=1014)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_153] (rows=20000000 width=1014)
+                                                Filter Operator [FIL_151] (rows=20000000 width=1014)
                                                   predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', 
 '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690'
 , '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '1579
 9', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null)
                                                   TableScan [TS_6] (rows=40000000 width=1014)
                                                     default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_zip"]
                     <-Reducer 2 [SIMPLE_EDGE]
                       SHUFFLE [RS_52]
                         PartitionCols:_col1
-                        Merge Join Operator [MERGEJOIN_116] (rows=633595212 width=88)
-                          Conds:RS_147._col0=RS_130._col0(Inner),Output:["_col1","_col2"]
+                        Merge Join Operator [MERGEJOIN_114] (rows=633595212 width=88)
+                          Conds:RS_145._col0=RS_128._col0(Inner),Output:["_col1","_col2"]
                         <-Map 6 [SIMPLE_EDGE] vectorized
-                          PARTITION_ONLY_SHUFFLE [RS_130]
+                          PARTITION_ONLY_SHUFFLE [RS_128]
                             PartitionCols:_col0
-                            Select Operator [SEL_129] (rows=18262 width=1119)
+                            Select Operator [SEL_127] (rows=18262 width=1119)
                               Output:["_col0"]
-                              Filter Operator [FIL_128] (rows=18262 width=1119)
+                              Filter Operator [FIL_126] (rows=18262 width=1119)
                                 predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null)
                                 TableScan [TS_3] (rows=73049 width=1119)
                                   default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"]
                         <-Map 1 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_147]
+                          SHUFFLE [RS_145]
                             PartitionCols:_col0
-                            Select Operator [SEL_146] (rows=575995635 width=88)
+                            Select Operator [SEL_144] (rows=575995635 width=88)
                               Output:["_col0","_col1","_col2"]
-                              Filter Operator [FIL_145] (rows=575995635 width=88)
+                              Filter Operator [FIL_143] (rows=575995635 width=88)
                                 predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null)
                                 TableScan [TS_0] (rows=575995635 width=88)
                                   default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
                                 <-Reducer 13 [BROADCAST_EDGE] vectorized
-                                  BROADCAST [RS_144]
-                                    Group By Operator [GBY_143] (rows=1 width=12)
+                                  BROADCAST [RS_142]
+                                    Group By Operator [GBY_141] (rows=1 width=12)
                                       Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
                                     <-Reducer 12 [CUSTOM_SIMPLE_EDGE]
-                                      SHUFFLE [RS_93]
-                                        Group By Operator [GBY_92] (rows=1 width=12)
+                                      SHUFFLE [RS_91]
+                                        Group By Operator [GBY_90] (rows=1 width=12)
                                           Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                          Select Operator [SEL_91] (rows=1874 width=1911)
+                                          Select Operator [SEL_89] (rows=1874 width=1911)
                                             Output:["_col0"]
-                                             Please refer to the previous Merge Join Operator [MERGEJOIN_118]
+                                             Please refer to the previous Merge Join Operator [MERGEJOIN_116]
                                 <-Reducer 7 [BROADCAST_EDGE] vectorized
-                                  BROADCAST [RS_135]
-                                    Group By Operator [GBY_134] (rows=1 width=12)
+                                  BROADCAST [RS_133]
+                                    Group By Operator [GBY_132] (rows=1 width=12)
                                       Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
                                     <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized
-                                      PARTITION_ONLY_SHUFFLE [RS_133]
-                                        Group By Operator [GBY_132] (rows=1 width=12)
+                                      PARTITION_ONLY_SHUFFLE [RS_131]
+                                        Group By Operator [GBY_130] (rows=1 width=12)
                                           Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                          Select Operator [SEL_131] (rows=18262 width=1119)
+                                          Select Operator [SEL_129] (rows=18262 width=1119)
                                             Output:["_col0"]
-                                             Please refer to the previous Select Operator [SEL_129]
+                                             Please refer to the previous Select Operator [SEL_127]