You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by ha...@apache.org on 2018/08/06 05:47:46 UTC

[1/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 27bdbdada -> 20c95c1c0


http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
index 7eff987d..c8cb431 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
@@ -58,33 +58,33 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: string)
                       sort order: +
                       Map-reduce partition columns: key (type: string)
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       value expressions: value (type: string)
         Reducer 2 
             Reduce Operator Tree:
               Forward
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
                   predicate: ((KEY._col0) IN (100, 150, 200) and (VALUE._col0) IN ('val_100', 'val_200', 'val_300')) (type: boolean)
-                  Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -92,20 +92,20 @@ STAGE PLANS:
                             name: default.e1_n1
                 Filter Operator
                   predicate: ((KEY._col0) IN (400, 450) and (VALUE._col0) IN ('val_400', 'val_500')) (type: boolean)
-                  Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -408,33 +408,33 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: string)
                       sort order: +
                       Map-reduce partition columns: key (type: string)
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       value expressions: value (type: string)
         Reducer 2 
             Reduce Operator Tree:
               Forward
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
                   predicate: ((KEY._col0) IN (100, 150, 200) and (VALUE._col0) IN ('val_100', 'val_200', 'val_300')) (type: boolean)
-                  Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -442,20 +442,20 @@ STAGE PLANS:
                             name: default.e1_n1
                 Filter Operator
                   predicate: ((KEY._col0) IN (400, 450) and (VALUE._col0) IN ('val_400', 'val_500')) (type: boolean)
-                  Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
index 30b5a2e..9b34b25 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
@@ -30726,17 +30726,17 @@ STAGE PLANS:
                   Filter Operator
                     isSamplingPred: false
                     predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
-                    Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30963,19 +30963,19 @@ STAGE PLANS:
                   Filter Operator
                     isSamplingPred: false
                     predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
                       keys: cstring1 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         null sort order: a
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         value expressions: _col1 (type: bigint)
                         auto parallelism: false
@@ -31042,16 +31042,16 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: bigint), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string)
                     null sort order: a
                     sort order: +
-                    Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
                     tag: -1
                     value expressions: _col0 (type: bigint)
                     auto parallelism: false
@@ -31062,13 +31062,13 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
index 83437e5..ca0b222 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -1502,11 +1502,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-0
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT CAST(14 AS INTEGER) AS `key`, `value`
-FROM (SELECT `value`
-FROM `default`.`pcr_t1`
-WHERE (`ds` = '2000-04-08' OR `ds` = '2000-04-09') AND `key` = 14
-ORDER BY `value`) AS `t1`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1522,7 +1517,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: pcr_t1
-                  filterExpr: (((ds = '2000-04-08') or (ds = '2000-04-09')) and (key = 14)) (type: boolean)
+                  filterExpr: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean)
                   Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
@@ -1696,10 +1691,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`
-FROM `default`.`pcr_t1`
-WHERE `ds` = '2000-04-08' OR `ds` = '2000-04-09'
-ORDER BY `key`, `value`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1715,7 +1706,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: pcr_t1
-                  filterExpr: ((ds = '2000-04-08') or (ds = '2000-04-09')) (type: boolean)
+                  filterExpr: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean)
                   Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Select Operator
@@ -2225,10 +2216,6 @@ PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, `ds`
-FROM `default`.`pcr_t1`
-WHERE `ds` = '2000-04-08' AND `key` = 1 OR `ds` = '2000-04-09' AND `key` = 2
-ORDER BY `key`, `value`, `ds`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -2244,22 +2231,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: pcr_t1
-                  filterExpr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+                  filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (struct(ds)) IN (struct('2000-04-08'), struct('2000-04-09'))) (type: boolean)
                   Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
-                    Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+                    Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string), ds (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                         null sort order: aaa
                         sort order: +++
-                        Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: false
             Execution mode: vectorized
@@ -2372,13 +2359,13 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -4289,11 +4276,6 @@ PREHOOK: query: explain extended select key, value, ds, hr from srcpart where ds
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
-FROM (SELECT `key`, `value`, `hr`
-FROM `default`.`srcpart`
-WHERE `ds` = '2008-04-08' AND (`hr` = '11' OR `hr` = '12') AND `key` = 11
-ORDER BY `key`, `hr`) AS `t1`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -4309,7 +4291,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
-                  filterExpr: ((ds = '2008-04-08') and ((hr = '11') or (hr = '12')) and (UDFToDouble(key) = 11.0D)) (type: boolean)
+                  filterExpr: ((hr) IN ('11', '12') and (ds = '2008-04-08') and (UDFToDouble(key) = 11.0D)) (type: boolean)
                   Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
index 4dfc0fe..e635ba2 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
@@ -399,7 +399,7 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       Filter Operator
-                        predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                        predicate: (_col0) IN ('a', 'b') (type: boolean)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
@@ -409,7 +409,7 @@ STAGE PLANS:
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Filter Operator
-                        predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                        predicate: (_col0) IN ('c', 'd') (type: boolean)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index 2420252..ffacb2c 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -6354,10 +6354,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date_hour_n1
-                  filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+                  filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
                   Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+                    predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean)
                     Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ds (type: string), hr (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
index 4606a0a..2aad190 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
@@ -165,11 +165,11 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col1 (type: int)
@@ -177,15 +177,15 @@ STAGE PLANS:
                       Select Operator
                         expressions: _col0 (type: int)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                           Spark Partition Pruning Sink Operator
                             Target Columns: [Map 1 -> [dim_shops_id:int (dim_shops_id)]]
-                            Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
             Local Work:
               Map Reduce Local Work
 
@@ -331,11 +331,11 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col1 (type: int)
@@ -705,11 +705,11 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col1 (type: int)
@@ -717,15 +717,15 @@ STAGE PLANS:
                       Select Operator
                         expressions: _col0 (type: int)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
                           Spark Partition Pruning Sink Operator
                             Target Columns: [Map 1 -> [dim_shops_id:int (dim_shops_id)]]
-                            Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
             Local Work:
               Map Reduce Local Work
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
index 736321b..00f5d7e 100644
--- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -1598,7 +1598,7 @@ Stage-0
     Select Operator [SEL_2]
       Output:["_col0"]
       Filter Operator [FIL_4]
-        predicate:((c_int = -6) or (c_int = 6))
+        predicate:(c_int) IN (-6, 6)
         TableScan [TS_0]
           Output:["key","c_int"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index 8b1a2be..afb76b0 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterLongColumnInList(col 3:date, values [-67, -171])
                     predicate: (cdate) IN (DATE'1969-10-26', DATE'1969-07-14') (type: boolean)
-                    Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdate (type: date)
                       outputColumnNames: _col0
@@ -65,7 +65,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: [3]
-                      Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: date)
                         sort order: +
@@ -73,7 +73,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
             Map Vectorization:
                 enabled: true
@@ -100,13 +100,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0]
-                Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -141,7 +141,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: decimal_date_test
-                  filterExpr: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean)
+                  filterExpr: ((cdate <> DATE'1969-10-26') and (cdate <> DATE'1969-07-14') and (cdate <> DATE'1970-01-21')) (type: boolean)
                   Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -149,15 +149,15 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 5:boolean)
-                    predicate: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE
+                        predicateExpression: FilterExprAndExpr(children: FilterDateColNotEqualDateScalar(col 3:date, val -67), FilterDateColNotEqualDateScalar(col 3:date, val -171), FilterDateColNotEqualDateScalar(col 3:date, val 20))
+                    predicate: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean)
+                    Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       Select Vectorization:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: []
-                      Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count()
                         Group By Vectorization:
@@ -259,7 +259,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterDecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568])
                     predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean)
-                    Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdecimal1 (type: decimal(20,10))
                       outputColumnNames: _col0
@@ -267,7 +267,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: [1]
-                      Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: decimal(20,10))
                         sort order: +
@@ -275,7 +275,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
             Map Vectorization:
                 enabled: true
@@ -302,13 +302,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0]
-                Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 6145 Data size: 1233908 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -343,7 +343,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: decimal_date_test
-                  filterExpr: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean)
+                  filterExpr: ((cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> -3367.6517567568)) (type: boolean)
                   Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -351,15 +351,15 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean)
-                    predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE
+                        predicateExpression: FilterExprAndExpr(children: FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568))
+                    predicate: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135)) (type: boolean)
+                    Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       Select Vectorization:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: []
-                      Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count()
                         Group By Vectorization:
@@ -1091,13 +1091,13 @@ STAGE PLANS:
                   TableScan Vectorization:
                       native: true
                   Select Operator
-                    expressions: (cdate) IN (DATE'1969-10-26', DATE'1969-07-14') (type: boolean)
+                    expressions: ((cdate = DATE'1969-10-26') or (cdate = DATE'1969-07-14')) (type: boolean)
                     outputColumnNames: _col0
                     Select Vectorization:
                         className: VectorSelectOperator
                         native: true
-                        projectedOutputColumnNums: [5]
-                        selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 5:boolean
+                        projectedOutputColumnNums: [7]
+                        selectExpressions: ColOrCol(col 5:boolean, col 6:boolean)(children: DateColEqualDateScalar(col 3:date, date 1969-10-26) -> 5:boolean, DateColEqualDateScalar(col 3:date, date 1969-07-14) -> 6:boolean) -> 7:boolean
                     Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
@@ -1105,7 +1105,7 @@ STAGE PLANS:
                           aggregators: VectorUDAFCountStar(*) -> bigint
                           className: VectorGroupByOperator
                           groupByMode: HASH
-                          keyExpressions: col 5:boolean
+                          keyExpressions: col 7:boolean
                           native: false
                           vectorProcessingMode: HASH
                           projectedOutputColumnNums: [0]
@@ -1227,13 +1227,13 @@ STAGE PLANS:
                   TableScan Vectorization:
                       native: true
                   Select Operator
-                    expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean)
+                    expressions: ((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)) (type: boolean)
                     outputColumnNames: _col0
                     Select Vectorization:
                         className: VectorSelectOperator
                         native: true
-                        projectedOutputColumnNums: [5]
-                        selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean
+                        projectedOutputColumnNums: [8]
+                        selectExpressions: VectorUDFAdaptor(((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)))(children: DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946) -> 5:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135) -> 6:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568) -> 7:boolean) -> 8:boolean
                     Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
@@ -1241,7 +1241,7 @@ STAGE PLANS:
                           aggregators: VectorUDAFCountStar(*) -> bigint
                           className: VectorGroupByOperator
                           groupByMode: HASH
-                          keyExpressions: col 5:boolean
+                          keyExpressions: col 8:boolean
                           native: false
                           vectorProcessingMode: HASH
                           projectedOutputColumnNums: [0]
@@ -1267,7 +1267,7 @@ STAGE PLANS:
                 featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 allNative: false
-                usesVectorUDFAdaptor: false
+                usesVectorUDFAdaptor: true
                 vectorized: true
         Reducer 2 
             Execution mode: vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index c6b204f..f1ce726 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -30726,17 +30726,17 @@ STAGE PLANS:
                   Filter Operator
                     isSamplingPred: false
                     predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
-                    Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9216 Data size: 2180995 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9216 Data size: 2180995 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 9216 Data size: 2180995 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30963,19 +30963,19 @@ STAGE PLANS:
                   Filter Operator
                     isSamplingPred: false
                     predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
                       keys: cstring1 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         null sort order: a
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         value expressions: _col1 (type: bigint)
                         auto parallelism: false
@@ -31042,16 +31042,16 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: bigint), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string)
                     null sort order: a
                     sort order: +
-                    Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
                     tag: -1
                     value expressions: _col0 (type: bigint)
                     auto parallelism: false
@@ -31062,13 +31062,13 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 3072 Data size: 726998 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 0bf2a4b..8296a65 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean)
+                  filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
                   Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -60,8 +60,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583))
-                    predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean)
+                        predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583])
+                    predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
                     Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string)
@@ -200,7 +200,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean)
+                  filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
                   Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -209,8 +209,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583))
-                    predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean)
+                        predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583])
+                    predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
                     Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN (null) ELSE ('c') END (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/stat_estimate_drill.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stat_estimate_drill.q.out b/ql/src/test/results/clientpositive/stat_estimate_drill.q.out
index 8a008c8..1cf449b 100644
--- a/ql/src/test/results/clientpositive/stat_estimate_drill.q.out
+++ b/ql/src/test/results/clientpositive/stat_estimate_drill.q.out
@@ -148,10 +148,10 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: t3
-            filterExpr: ((a = 1) or (a = 2)) (type: boolean)
+            filterExpr: (a) IN (1, 2) (type: boolean)
             Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: ((a = 1) or (a = 2)) (type: boolean)
+              predicate: (a) IN (1, 2) (type: boolean)
               Statistics: Num rows: 200/200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
                 aggregations: sum(a)
@@ -481,10 +481,10 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: t3
-            filterExpr: (((a = 1) and (b = 2)) or ((a = 2) and (b = 3)) or ((a = 3) and (b = 4))) (type: boolean)
+            filterExpr: (struct(a,b)) IN (const struct(1,2), const struct(2,3), const struct(3,4)) (type: boolean)
             Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (((a = 1) and (b = 2)) or ((a = 2) and (b = 3)) or ((a = 3) and (b = 4))) (type: boolean)
+              predicate: (struct(a,b)) IN (const struct(1,2), const struct(2,3), const struct(3,4)) (type: boolean)
               Statistics: Num rows: 30/30 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
                 aggregations: sum(a)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
index 5a50431..e9d99c5 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
@@ -303,7 +303,7 @@ Stage-3
                     Select Operator [SEL_2] (rows=2/2 width=302)
                       Output:["_col0","_col1","_col3"]
                       Filter Operator [FIL_9] (rows=2/2 width=226)
-                        predicate:((de = 109.23) or (de = 119.23))
+                        predicate:(de) IN (109.23, 119.23)
                         TableScan [TS_0] (rows=4/4 width=226)
                           default@acid_uami_n2,acid_uami_n2, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["i","de","vc"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/vector_date_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_date_1.q.out b/ql/src/test/results/clientpositive/vector_date_1.q.out
index cb952ec..6e997bc 100644
--- a/ql/src/test/results/clientpositive/vector_date_1.q.out
+++ b/ql/src/test/results/clientpositive/vector_date_1.q.out
@@ -923,7 +923,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterLongColumnInList(col 0:date, values [0, 11323])
               predicate: (dt1) IN (DATE'1970-01-01', DATE'2001-01-01') (type: boolean)
-              Statistics: Num rows: 2 Data size: 149 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: dt1 (type: date)
                 outputColumnNames: _col0
@@ -931,13 +931,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0]
-                Statistics: Num rows: 2 Data size: 149 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 2 Data size: 149 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out b/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out
index 966edad..7ddfda1 100644
--- a/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out
+++ b/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out
@@ -16,28 +16,32 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypesorc
-            filterExpr: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean)
+            filterExpr: ((cint = UDFToInteger(ctinyint)) or (cint = UDFToInteger(cbigint))) (type: boolean)
             Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean)
-              Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((cint = UDFToInteger(cbigint)) or (cint = UDFToInteger(ctinyint))) (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
           inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values
-          vectorized: false
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/vector_struct_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_struct_in.q.out b/ql/src/test/results/clientpositive/vector_struct_in.q.out
index d073ec6..110f01b 100644
--- a/ql/src/test/results/clientpositive/vector_struct_in.q.out
+++ b/ql/src/test/results/clientpositive/vector_struct_in.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterStructColumnInList(structExpressions [col 0:string, col 1:string], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1])
               predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean)
-              Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: id (type: string), lineid (type: string)
                 outputColumnNames: _col0, _col1
@@ -76,13 +76,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0, 1]
-                Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -319,7 +319,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterStructColumnInList(structExpressions [col 0:int, col 1:int], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1])
               predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean)
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: id (type: int), lineid (type: int)
                 outputColumnNames: _col0, _col1
@@ -327,13 +327,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0, 1]
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -570,7 +570,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterStructColumnInList(structExpressions [col 0:string, col 1:int], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1])
               predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean)
-              Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: id (type: string), lineid (type: int)
                 outputColumnNames: _col0, _col1
@@ -578,13 +578,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0, 1]
-                Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -824,7 +824,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterStructColumnInList(structExpressions [col 0:bigint, col 1:string, col 2:double], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2])
               predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1L,'a',1.5D), const struct(1L,'b',-0.5D), const struct(3L,'b',1.5D), const struct(1L,'d',1.5D), const struct(1L,'c',1.5D), const struct(1L,'b',2.5D), const struct(1L,'b',0.5D), const struct(5L,'b',1.5D), const struct(1L,'a',0.5D), const struct(3L,'b',1.5D)) (type: boolean)
-              Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double)
                 outputColumnNames: _col0, _col1, _col2
@@ -832,13 +832,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0, 1, 2]
-                Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 828131f..3262fdc 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -48,7 +48,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypesorc
-            filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean)
+            filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
             Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
@@ -57,8 +57,8 @@ STAGE PLANS:
               Filter Vectorization:
                   className: VectorFilterOperator
                   native: true
-                  predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583))
-              predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean)
+                  predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583])
+              predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
               Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string)
@@ -194,7 +194,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypesorc
-            filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean)
+            filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
             Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
             TableScan Vectorization:
                 native: true
@@ -203,8 +203,8 @@ STAGE PLANS:
               Filter Vectorization:
                   className: VectorFilterOperator
                   native: true
-                  predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583))
-              predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean)
+                  predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583])
+              predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
               Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN (null) ELSE ('c') END (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp.q.out
index be18919..da869ce 100644
--- a/ql/src/test/results/clientpositive/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_timestamp.q.out
@@ -233,7 +233,7 @@ STAGE PLANS:
                   native: true
                   predicateExpression: FilterTimestampColumnInList(col 0:timestamp, values [0001-01-02 16:00:00.0, 0002-02-03 16:00:00.0])
               predicate: (ts) IN (TIMESTAMP'0001-01-01 00:00:00', TIMESTAMP'0002-02-02 00:00:00') (type: boolean)
-              Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: ts (type: timestamp)
                 outputColumnNames: _col0
@@ -241,13 +241,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumnNums: [0]
-                Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[4/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out
index 5cd4e27..6274d2a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query13.q.out
@@ -101,18 +101,18 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 9 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
 Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
 Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
 Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE)
 Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
 Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
 Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
-Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
@@ -127,142 +127,140 @@ Stage-0
           <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
             PARTITION_ONLY_SHUFFLE [RS_37]
               Group By Operator [GBY_36] (rows=1 width=256)
-                Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)"]
-                Select Operator [SEL_35] (rows=715776 width=88)
-                  Output:["_col6","_col8","_col9"]
-                  Filter Operator [FIL_34] (rows=715776 width=88)
-                    predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1)))
-                    Merge Join Operator [MERGEJOIN_121] (rows=17178642 width=88)
-                      Conds:RS_31._col2=RS_156._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"]
-                    <-Map 16 [SIMPLE_EDGE] vectorized
-                      SHUFFLE [RS_156]
-                        PartitionCols:_col0
-                        Select Operator [SEL_155] (rows=1861800 width=385)
-                          Output:["_col0","_col1","_col2"]
-                          Filter Operator [FIL_154] (rows=1861800 width=385)
-                            predicate:(((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null)
-                            TableScan [TS_15] (rows=1861800 width=385)
-                              default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
-                    <-Reducer 5 [SIMPLE_EDGE]
-                      SHUFFLE [RS_31]
-                        PartitionCols:_col2
-                        Filter Operator [FIL_30] (rows=15616947 width=88)
-                          predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250))
-                          Merge Join Operator [MERGEJOIN_120] (rows=93701693 width=88)
-                            Conds:RS_27._col4=RS_148._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"]
-                          <-Map 14 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_148]
-                              PartitionCols:_col0
-                              Select Operator [SEL_147] (rows=10000000 width=1014)
-                                Output:["_col0","_col1"]
-                                Filter Operator [FIL_146] (rows=10000000 width=1014)
-                                  predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null)
-                                  TableScan [TS_12] (rows=40000000 width=1014)
-                                    default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
-                          <-Reducer 4 [SIMPLE_EDGE]
-                            SHUFFLE [RS_27]
-                              PartitionCols:_col4
-                              Merge Join Operator [MERGEJOIN_119] (rows=85183356 width=88)
-                                Conds:RS_24._col3=RS_140._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"]
-                              <-Map 12 [SIMPLE_EDGE] vectorized
-                                SHUFFLE [RS_140]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_139] (rows=7200 width=107)
-                                    Output:["_col0","_col1"]
-                                    Filter Operator [FIL_138] (rows=7200 width=107)
-                                      predicate:(((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null)
-                                      TableScan [TS_9] (rows=7200 width=107)
-                                        default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"]
-                              <-Reducer 3 [SIMPLE_EDGE]
-                                SHUFFLE [RS_24]
-                                  PartitionCols:_col3
-                                  Merge Join Operator [MERGEJOIN_118] (rows=77439413 width=88)
-                                    Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"]
-                                  <-Map 10 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_132]
-                                      PartitionCols:_col0
-                                      Select Operator [SEL_131] (rows=36524 width=1119)
-                                        Output:["_col0"]
-                                        Filter Operator [FIL_130] (rows=36524 width=1119)
-                                          predicate:((d_year = 2001) and d_date_sk is not null)
-                                          TableScan [TS_6] (rows=73049 width=1119)
-                                            default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
-                                  <-Reducer 2 [SIMPLE_EDGE]
-                                    SHUFFLE [RS_21]
-                                      PartitionCols:_col1
-                                      Merge Join Operator [MERGEJOIN_117] (rows=70399465 width=88)
-                                        Conds:RS_124._col0=RS_164._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"]
-                                      <-Map 1 [SIMPLE_EDGE] vectorized
-                                        PARTITION_ONLY_SHUFFLE [RS_124]
-                                          PartitionCols:_col0
-                                          Select Operator [SEL_123] (rows=1704 width=1910)
-                                            Output:["_col0"]
-                                            Filter Operator [FIL_122] (rows=1704 width=1910)
-                                              predicate:s_store_sk is not null
-                                              TableScan [TS_0] (rows=1704 width=1910)
-                                                default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"]
-                                      <-Map 9 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_164]
-                                          PartitionCols:_col4
-                                          Select Operator [SEL_163] (rows=63999513 width=88)
-                                            Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-                                            Filter Operator [FIL_162] (rows=63999513 width=88)
-                                              predicate:((ss_addr_sk BETWEEN DynamicValue(RS_28_customer_address_ca_address_sk_min) AND DynamicValue(RS_28_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_28_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_32_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_32_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_32_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 1
 50 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null)
-                                              TableScan [TS_3] (rows=575995635 width=88)
-                                                default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"]
-                                              <-Reducer 11 [BROADCAST_EDGE] vectorized
-                                                BROADCAST [RS_137]
-                                                  Group By Operator [GBY_136] (rows=1 width=12)
-                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                  <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_135]
-                                                      Group By Operator [GBY_134] (rows=1 width=12)
-                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_133] (rows=36524 width=1119)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Select Operator [SEL_131]
-                                              <-Reducer 13 [BROADCAST_EDGE] vectorized
-                                                BROADCAST [RS_145]
-                                                  Group By Operator [GBY_144] (rows=1 width=12)
-                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                  <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_143]
-                                                      Group By Operator [GBY_142] (rows=1 width=12)
-                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_141] (rows=7200 width=107)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Select Operator [SEL_139]
-                                              <-Reducer 15 [BROADCAST_EDGE] vectorized
-                                                BROADCAST [RS_153]
-                                                  Group By Operator [GBY_152] (rows=1 width=12)
-                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=10000000)"]
-                                                  <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_151]
-                                                      Group By Operator [GBY_150] (rows=1 width=12)
-                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=10000000)"]
-                                                        Select Operator [SEL_149] (rows=10000000 width=1014)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Select Operator [SEL_147]
-                                              <-Reducer 17 [BROADCAST_EDGE] vectorized
-                                                BROADCAST [RS_161]
-                                                  Group By Operator [GBY_160] (rows=1 width=12)
-                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"]
-                                                  <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_159]
-                                                      Group By Operator [GBY_158] (rows=1 width=12)
-                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"]
-                                                        Select Operator [SEL_157] (rows=1861800 width=385)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Select Operator [SEL_155]
-                                              <-Reducer 8 [BROADCAST_EDGE] vectorized
-                                                BROADCAST [RS_129]
-                                                  Group By Operator [GBY_128] (rows=1 width=12)
-                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                  <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                    PARTITION_ONLY_SHUFFLE [RS_127]
-                                                      Group By Operator [GBY_126] (rows=1 width=12)
-                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_125] (rows=1704 width=1910)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Select Operator [SEL_123]
+                Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)"]
+                Merge Join Operator [MERGEJOIN_121] (rows=8066665 width=1014)
+                  Conds:RS_32._col4=RS_156._col0(Inner),Output:["_col5","_col7","_col8"]
+                <-Map 16 [SIMPLE_EDGE] vectorized
+                  SHUFFLE [RS_156]
+                    PartitionCols:_col0
+                    Select Operator [SEL_155] (rows=1704 width=1910)
+                      Output:["_col0"]
+                      Filter Operator [FIL_154] (rows=1704 width=1910)
+                        predicate:s_store_sk is not null
+                        TableScan [TS_15] (rows=1704 width=1910)
+                          default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"]
+                <-Reducer 5 [SIMPLE_EDGE]
+                  SHUFFLE [RS_32]
+                    PartitionCols:_col4
+                    Filter Operator [FIL_31] (rows=7333332 width=1014)
+                      predicate:((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250))
+                      Merge Join Operator [MERGEJOIN_120] (rows=22000000 width=1014)
+                        Conds:RS_28._col3=RS_148._col0(Inner),Output:["_col4","_col5","_col7","_col8","_col9","_col18"]
+                      <-Map 14 [SIMPLE_EDGE] vectorized
+                        SHUFFLE [RS_148]
+                          PartitionCols:_col0
+                          Select Operator [SEL_147] (rows=20000000 width=1014)
+                            Output:["_col0","_col1"]
+                            Filter Operator [FIL_146] (rows=20000000 width=1014)
+                              predicate:((ca_country = 'United States') and ca_address_sk is not null)
+                              TableScan [TS_12] (rows=40000000 width=1014)
+                                default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
+                      <-Reducer 4 [SIMPLE_EDGE]
+                        SHUFFLE [RS_28]
+                          PartitionCols:_col3
+                          Filter Operator [FIL_27] (rows=10647918 width=88)
+                            predicate:(((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1)))
+                            Merge Join Operator [MERGEJOIN_119] (rows=255550079 width=88)
+                              Conds:RS_24._col2=RS_140._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col16"]
+                            <-Map 12 [SIMPLE_EDGE] vectorized
+                              SHUFFLE [RS_140]
+                                PartitionCols:_col0
+                                Select Operator [SEL_139] (rows=7200 width=107)
+                                  Output:["_col0","_col1"]
+                                  Filter Operator [FIL_138] (rows=7200 width=107)
+                                    predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null)
+                                    TableScan [TS_9] (rows=7200 width=107)
+                                      default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"]
+                            <-Reducer 3 [SIMPLE_EDGE]
+                              SHUFFLE [RS_24]
+                                PartitionCols:_col2
+                                Merge Join Operator [MERGEJOIN_118] (rows=232318249 width=88)
+                                  Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14"]
+                                <-Map 10 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_132]
+                                    PartitionCols:_col0
+                                    Select Operator [SEL_131] (rows=1861800 width=385)
+                                      Output:["_col0","_col1","_col2"]
+                                      Filter Operator [FIL_130] (rows=1861800 width=385)
+                                        predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null)
+                                        TableScan [TS_6] (rows=1861800 width=385)
+                                          default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
+                                <-Reducer 2 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_21]
+                                    PartitionCols:_col1
+                                    Merge Join Operator [MERGEJOIN_117] (rows=211198404 width=88)
+                                      Conds:RS_164._col0=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+                                    <-Map 8 [SIMPLE_EDGE] vectorized
+                                      PARTITION_ONLY_SHUFFLE [RS_124]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_123] (rows=36524 width=1119)
+                                          Output:["_col0"]
+                                          Filter Operator [FIL_122] (rows=36524 width=1119)
+                                            predicate:((d_year = 2001) and d_date_sk is not null)
+                                            TableScan [TS_3] (rows=73049 width=1119)
+                                              default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+                                    <-Map 1 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_164]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_163] (rows=191998545 width=88)
+                                          Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+                                          Filter Operator [FIL_162] (rows=191998545 width=88)
+                                            predicate:((ss_addr_sk BETWEEN DynamicValue(RS_29_customer_address_ca_address_sk_min) AND DynamicValue(RS_29_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_29_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim
 _d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_33_store_s_store_sk_min) AND DynamicValue(RS_33_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_33_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null)
+                                            TableScan [TS_0] (rows=575995635 width=88)
+                                              default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"]
+                                            <-Reducer 11 [BROADCAST_EDGE] vectorized
+                                              BROADCAST [RS_137]
+                                                Group By Operator [GBY_136] (rows=1 width=12)
+                                                  Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"]
+                                                <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  SHUFFLE [RS_135]
+                                                    Group By Operator [GBY_134] (rows=1 width=12)
+                                                      Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"]
+                                                      Select Operator [SEL_133] (rows=1861800 width=385)
+                                                        Output:["_col0"]
+                                                         Please refer to the previous Select Operator [SEL_131]
+                                            <-Reducer 13 [BROADCAST_EDGE] vectorized
+                                              BROADCAST [RS_145]
+                                                Group By Operator [GBY_144] (rows=1 width=12)
+                                                  Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                                <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  SHUFFLE [RS_143]
+                                                    Group By Operator [GBY_142] (rows=1 width=12)
+                                                      Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                                      Select Operator [SEL_141] (rows=7200 width=107)
+                                                        Output:["_col0"]
+                                                         Please refer to the previous Select Operator [SEL_139]
+                                            <-Reducer 15 [BROADCAST_EDGE] vectorized
+                                              BROADCAST [RS_153]
+                                                Group By Operator [GBY_152] (rows=1 width=12)
+                                                  Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"]
+                                                <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  SHUFFLE [RS_151]
+                                                    Group By Operator [GBY_150] (rows=1 width=12)
+                                                      Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"]
+                                                      Select Operator [SEL_149] (rows=20000000 width=1014)
+                                                        Output:["_col0"]
+                                                         Please refer to the previous Select Operator [SEL_147]
+                                            <-Reducer 17 [BROADCAST_EDGE] vectorized
+                                              BROADCAST [RS_161]
+                                                Group By Operator [GBY_160] (rows=1 width=12)
+                                                  Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                                <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  SHUFFLE [RS_159]
+                                                    Group By Operator [GBY_158] (rows=1 width=12)
+                                                      Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                                      Select Operator [SEL_157] (rows=1704 width=1910)
+                                                        Output:["_col0"]
+                                                         Please refer to the previous Select Operator [SEL_155]
+                                            <-Reducer 9 [BROADCAST_EDGE] vectorized
+                                              BROADCAST [RS_129]
+                                                Group By Operator [GBY_128] (rows=1 width=12)
+                                                  Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                                <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  PARTITION_ONLY_SHUFFLE [RS_127]
+                                                    Group By Operator [GBY_126] (rows=1 width=12)
+                                                      Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                                      Select Operator [SEL_125] (rows=36524 width=1119)
+                                                        Output:["_col0"]
+                                                         Please refer to the previous Select Operator [SEL_123]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query15.q.out b/ql/src/test/results/clientpositive/perf/tez/query15.q.out
index 3c7ae66..e1eca99 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query15.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query15.q.out
@@ -71,7 +71,7 @@ Stage-0
                       Select Operator [SEL_23] (rows=348467716 width=135)
                         Output:["_col4","_col7"]
                         Filter Operator [FIL_22] (rows=348467716 width=135)
-                          predicate:((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792'))
+                          predicate:((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792'))
                           Merge Join Operator [MERGEJOIN_77] (rows=348467716 width=135)
                             Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col7"]
                           <-Reducer 2 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
index 5652f3b..cbbd1fa 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
@@ -150,9 +150,9 @@ Stage-0
                                           <-Map 14 [SIMPLE_EDGE] vectorized
                                             SHUFFLE [RS_149]
                                               PartitionCols:_col0
-                                              Select Operator [SEL_148] (rows=30 width=2045)
+                                              Select Operator [SEL_148] (rows=60 width=2045)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_147] (rows=30 width=2045)
+                                                Filter Operator [FIL_147] (rows=60 width=2045)
                                                   predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
                                                   TableScan [TS_9] (rows=60 width=2045)
                                                     default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
@@ -223,7 +223,7 @@ Stage-0
                                                                 SHUFFLE [RS_152]
                                                                   Group By Operator [GBY_151] (rows=1 width=12)
                                                                     Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                    Select Operator [SEL_150] (rows=30 width=2045)
+                                                                    Select Operator [SEL_150] (rows=60 width=2045)
                                                                       Output:["_col0"]
                                                                        Please refer to the previous Select Operator [SEL_148]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
index e185775..6b35492 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
@@ -161,9 +161,9 @@ Stage-0
                                       <-Map 8 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_224]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_219] (rows=36525 width=1119)
+                                          Select Operator [SEL_219] (rows=73049 width=1119)
                                             Output:["_col0"]
-                                            Filter Operator [FIL_216] (rows=36525 width=1119)
+                                            Filter Operator [FIL_216] (rows=73049 width=1119)
                                               predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
                                               TableScan [TS_3] (rows=73049 width=1119)
                                                 default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"]
@@ -184,9 +184,9 @@ Stage-0
                                       <-Map 8 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_222]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_218] (rows=36525 width=1119)
+                                          Select Operator [SEL_218] (rows=73049 width=1119)
                                             Output:["_col0"]
-                                            Filter Operator [FIL_215] (rows=36525 width=1119)
+                                            Filter Operator [FIL_215] (rows=73049 width=1119)
                                               predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
                                                Please refer to the previous TableScan [TS_3]
                                       <-Map 18 [SIMPLE_EDGE] vectorized
@@ -244,7 +244,7 @@ Stage-0
                                                     PARTITION_ONLY_SHUFFLE [RS_228]
                                                       Group By Operator [GBY_226] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_223] (rows=36525 width=1119)
+                                                        Select Operator [SEL_223] (rows=73049 width=1119)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_218]
                               <-Reducer 3 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query18.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/query18.q.out
index 1b9b2fb..da5d3b1 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query18.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query18.q.out
@@ -67,35 +67,34 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
-Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE)
-Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
-Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
-Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE)
-Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
-Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Map 9 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE)
+Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
+Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE)
+Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:100
     Stage-1
       Reducer 6 vectorized
-      File Output Operator [FS_189]
-        Limit [LIM_188] (rows=100 width=135)
+      File Output Operator [FS_187]
+        Limit [LIM_186] (rows=100 width=135)
           Number of rows:100
-          Select Operator [SEL_187] (rows=1054114882 width=135)
+          Select Operator [SEL_185] (rows=1054114882 width=135)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
           <-Reducer 5 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_186]
-              Select Operator [SEL_185] (rows=1054114882 width=135)
+            SHUFFLE [RS_184]
+              Select Operator [SEL_183] (rows=1054114882 width=135)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
-                Group By Operator [GBY_184] (rows=1054114882 width=135)
+                Group By Operator [GBY_182] (rows=1054114882 width=135)
                   Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
                 <-Reducer 4 [SIMPLE_EDGE]
                   SHUFFLE [RS_43]
@@ -106,51 +105,14 @@ Stage-0
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
                         Merge Join Operator [MERGEJOIN_145] (rows=421645953 width=135)
                           Conds:RS_37._col0=RS_38._col3(Inner),Output:["_col4","_col6","_col7","_col8","_col11","_col16","_col17","_col18","_col19","_col20","_col26"]
-                        <-Reducer 3 [SIMPLE_EDGE]
-                          PARTITION_ONLY_SHUFFLE [RS_37]
-                            PartitionCols:_col0
-                            Merge Join Operator [MERGEJOIN_141] (rows=48400001 width=860)
-                              Conds:RS_34._col1=RS_154._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8"]
-                            <-Map 9 [SIMPLE_EDGE] vectorized
-                              SHUFFLE [RS_154]
-                                PartitionCols:_col0
-                                Select Operator [SEL_153] (rows=1861800 width=385)
-                                  Output:["_col0"]
-                                  Filter Operator [FIL_152] (rows=1861800 width=385)
-                                    predicate:cd_demo_sk is not null
-                                    TableScan [TS_6] (rows=1861800 width=385)
-                                      default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
-                            <-Reducer 2 [SIMPLE_EDGE]
-                              SHUFFLE [RS_34]
-                                PartitionCols:_col1
-                                Merge Join Operator [MERGEJOIN_140] (rows=44000000 width=860)
-                                  Conds:RS_148._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"]
-                                <-Map 1 [SIMPLE_EDGE] vectorized
-                                  SHUFFLE [RS_148]
-                                    PartitionCols:_col2
-                                    Select Operator [SEL_147] (rows=40000000 width=860)
-                                      Output:["_col0","_col1","_col2","_col4"]
-                                      Filter Operator [FIL_146] (rows=40000000 width=860)
-                                        predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
-                                        TableScan [TS_0] (rows=80000000 width=860)
-                                          default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"]
-                                <-Map 8 [SIMPLE_EDGE] vectorized
-                                  SHUFFLE [RS_151]
-                                    PartitionCols:_col0
-                                    Select Operator [SEL_150] (rows=20000000 width=1014)
-                                      Output:["_col0","_col1","_col2","_col3"]
-                                      Filter Operator [FIL_149] (rows=20000000 width=1014)
-                                        predicate:((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null)
-                                        TableScan [TS_3] (rows=40000000 width=1014)
-                                          default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state","ca_country"]
-                        <-Reducer 13 [SIMPLE_EDGE]
+                        <-Reducer 12 [SIMPLE_EDGE]
                           SHUFFLE [RS_38]
                             PartitionCols:_col3
                             Select Operator [SEL_30] (rows=383314495 width=135)
                               Output:["_col1","_col3","_col6","_col7","_col8","_col9","_col10","_col16"]
                               Merge Join Operator [MERGEJOIN_144] (rows=383314495 width=135)
                                 Conds:RS_27._col3=RS_173._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col14","_col16"]
-                              <-Map 18 [SIMPLE_EDGE] vectorized
+                              <-Map 17 [SIMPLE_EDGE] vectorized
                                 PARTITION_ONLY_SHUFFLE [RS_173]
                                   PartitionCols:_col0
                                   Select Operator [SEL_172] (rows=462000 width=1436)
@@ -159,12 +121,12 @@ Stage-0
                                       predicate:i_item_sk is not null
                                       TableScan [TS_18] (rows=462000 width=1436)
                                         default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
-                              <-Reducer 12 [SIMPLE_EDGE]
+                              <-Reducer 11 [SIMPLE_EDGE]
                                 SHUFFLE [RS_27]
                                   PartitionCols:_col3
                                   Merge Join Operator [MERGEJOIN_143] (rows=348467716 width=135)
                                     Conds:RS_24._col2=RS_165._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col14"]
-                                  <-Map 16 [SIMPLE_EDGE] vectorized
+                                  <-Map 15 [SIMPLE_EDGE] vectorized
                                     PARTITION_ONLY_SHUFFLE [RS_165]
                                       PartitionCols:_col0
                                       Select Operator [SEL_164] (rows=465450 width=385)
@@ -173,12 +135,12 @@ Stage-0
                                           predicate:((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null)
                                           TableScan [TS_15] (rows=1861800 width=385)
                                             default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"]
-                                  <-Reducer 11 [SIMPLE_EDGE]
+                                  <-Reducer 10 [SIMPLE_EDGE]
                                     SHUFFLE [RS_24]
                                       PartitionCols:_col2
                                       Merge Join Operator [MERGEJOIN_142] (rows=316788826 width=135)
-                                        Conds:RS_183._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                                      <-Map 14 [SIMPLE_EDGE] vectorized
+                                        Conds:RS_181._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                                      <-Map 13 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_157]
                                           PartitionCols:_col0
                                           Select Operator [SEL_156] (rows=36524 width=1119)
@@ -187,57 +149,83 @@ Stage-0
                                               predicate:((d_year = 2001) and d_date_sk is not null)
                                               TableScan [TS_12] (rows=73049 width=1119)
                                                 default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
-                                      <-Map 10 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_183]
+                                      <-Map 9 [SIMPLE_EDGE] vectorized
+                                        SHUFFLE [RS_181]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_182] (rows=287989836 width=135)
+                                          Select Operator [SEL_180] (rows=287989836 width=135)
                                             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                                            Filter Operator [FIL_181] (rows=287989836 width=135)
-                                              predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_25_cd1_cd_demo_sk_min) AND DynamicValue(RS_25_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_25_cd1_cd_demo_sk_bloom_filter))) and (cs_bill_customer_sk BETWEEN DynamicValue(RS_37_customer_c_customer_sk_min) AND DynamicValue(RS_37_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_37_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk 
 is not null)
+                                            Filter Operator [FIL_179] (rows=287989836 width=135)
+                                              predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_25_cd1_cd_demo_sk_min) AND DynamicValue(RS_25_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_25_cd1_cd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null)
                                               TableScan [TS_9] (rows=287989836 width=135)
                                                 default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"]
-                                              <-Reducer 15 [BROADCAST_EDGE] vectorized
+                                              <-Reducer 14 [BROADCAST_EDGE] vectorized
                                                 BROADCAST [RS_162]
                                                   Group By Operator [GBY_161] (rows=1 width=12)
                                                     Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                  <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized
                                                     PARTITION_ONLY_SHUFFLE [RS_160]
                                                       Group By Operator [GBY_159] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
                                                         Select Operator [SEL_158] (rows=36524 width=1119)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_156]
-                                              <-Reducer 17 [BROADCAST_EDGE] vectorized
+                                              <-Reducer 16 [BROADCAST_EDGE] vectorized
                                                 BROADCAST [RS_170]
                                                   Group By Operator [GBY_169] (rows=1 width=12)
                                                     Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                  <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized
                                                     PARTITION_ONLY_SHUFFLE [RS_168]
                                                       Group By Operator [GBY_167] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
                                                         Select Operator [SEL_166] (rows=465450 width=385)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_164]
-                                              <-Reducer 19 [BROADCAST_EDGE] vectorized
+                                              <-Reducer 18 [BROADCAST_EDGE] vectorized
                                                 BROADCAST [RS_178]
                                                   Group By Operator [GBY_177] (rows=1 width=12)
                                                     Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                  <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized
                                                     PARTITION_ONLY_SHUFFLE [RS_176]
                                                       Group By Operator [GBY_175] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
                                                         Select Operator [SEL_174] (rows=462000 width=1436)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_172]
-                                              <-Reducer 7 [BROADCAST_EDGE] vectorized
-                                                BROADCAST [RS_180]
-                                                  Group By Operator [GBY_179] (rows=1 width=12)
-                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=48400000)"]
-                                                  <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
-                                                    PARTITION_ONLY_SHUFFLE [RS_126]
-                                                      Group By Operator [GBY_125] (rows=1 width=12)
-                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=48400000)"]
-                                                        Select Operator [SEL_124] (rows=48400001 width=860)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Merge Join Operator [MERGEJOIN_141]
+                        <-Reducer 3 [SIMPLE_EDGE]
+                          SHUFFLE [RS_37]
+                            PartitionCols:_col0
+                            Merge Join Operator [MERGEJOIN_141] (rows=96800003 width=860)
+                              Conds:RS_34._col1=RS_154._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8"]
+                            <-Map 8 [SIMPLE_EDGE] vectorized
+                              SHUFFLE [RS_154]
+                                PartitionCols:_col0
+                                Select Operator [SEL_153] (rows=1861800 width=385)
+                                  Output:["_col0"]
+                                  Filter Operator [FIL_152] (rows=1861800 width=385)
+                                    predicate:cd_demo_sk is not null
+                                    TableScan [TS_6] (rows=1861800 width=385)
+                                      default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+                            <-Reducer 2 [SIMPLE_EDGE]
+                              SHUFFLE [RS_34]
+                                PartitionCols:_col1
+                                Merge Join Operator [MERGEJOIN_140] (rows=88000001 width=860)
+                                  Conds:RS_148._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"]
+                                <-Map 1 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_148]
+                                    PartitionCols:_col2
+                                    Select Operator [SEL_147] (rows=80000000 width=860)
+                                      Output:["_col0","_col1","_col2","_col4"]
+                                      Filter Operator [FIL_146] (rows=80000000 width=860)
+                                        predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
+                                        TableScan [TS_0] (rows=80000000 width=860)
+                                          default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"]
+                                <-Map 7 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_151]
+                                    PartitionCols:_col0
+                                    Select Operator [SEL_150] (rows=40000000 width=1014)
+                                      Output:["_col0","_col1","_col2","_col3"]
+                                      Filter Operator [FIL_149] (rows=40000000 width=1014)
+                                        predicate:((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null)
+                                        TableScan [TS_3] (rows=40000000 width=1014)
+                                          default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state","ca_country"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query20.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query20.q.out b/ql/src/test/results/clientpositive/perf/tez/query20.q.out
index 7d126a8..cfa58f1 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query20.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query20.q.out
@@ -99,9 +99,9 @@ Stage-0
                             <-Map 9 [SIMPLE_EDGE] vectorized
                               SHUFFLE [RS_69]
                                 PartitionCols:_col0
-                                Select Operator [SEL_68] (rows=231000 width=1436)
+                                Select Operator [SEL_68] (rows=462000 width=1436)
                                   Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                                  Filter Operator [FIL_67] (rows=231000 width=1436)
+                                  Filter Operator [FIL_67] (rows=462000 width=1436)
                                     predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null)
                                     TableScan [TS_6] (rows=462000 width=1436)
                                       default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"]
@@ -136,7 +136,7 @@ Stage-0
                                               SHUFFLE [RS_72]
                                                 Group By Operator [GBY_71] (rows=1 width=12)
                                                   Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                  Select Operator [SEL_70] (rows=231000 width=1436)
+                                                  Select Operator [SEL_70] (rows=462000 width=1436)
                                                     Output:["_col0"]
                                                      Please refer to the previous Select Operator [SEL_68]
                                         <-Reducer 8 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
index aab3f93..4d8e319 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
@@ -206,9 +206,9 @@ Stage-0
                                                 <-Map 18 [SIMPLE_EDGE] vectorized
                                                   PARTITION_ONLY_SHUFFLE [RS_613]
                                                     PartitionCols:_col0
-                                                    Select Operator [SEL_612] (rows=36525 width=1119)
+                                                    Select Operator [SEL_612] (rows=73049 width=1119)
                                                       Output:["_col0","_col1"]
-                                                      Filter Operator [FIL_611] (rows=36525 width=1119)
+                                                      Filter Operator [FIL_611] (rows=73049 width=1119)
                                                         predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
                                                         TableScan [TS_9] (rows=73049 width=1119)
                                                           default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"]
@@ -229,7 +229,7 @@ Stage-0
                                                               PARTITION_ONLY_SHUFFLE [RS_616]
                                                                 Group By Operator [GBY_615] (rows=1 width=12)
                                                                   Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                  Select Operator [SEL_614] (rows=36525 width=1119)
+                                                                  Select Operator [SEL_614] (rows=73049 width=1119)
                                                                     Output:["_col0"]
                                                                      Please refer to the previous Select Operator [SEL_612]
                                                         <-Reducer 21 [BROADCAST_EDGE] vectorized
@@ -367,9 +367,9 @@ Stage-0
                                                             <-Map 30 [SIMPLE_EDGE] vectorized
                                                               PARTITION_ONLY_SHUFFLE [RS_652]
                                                                 PartitionCols:_col0
-                                                                Select Operator [SEL_649] (rows=36525 width=1119)
+                                                                Select Operator [SEL_649] (rows=73049 width=1119)
                                                                   Output:["_col0"]
-                                                                  Filter Operator [FIL_648] (rows=36525 width=1119)
+                                                                  Filter Operator [FIL_648] (rows=73049 width=1119)
                                                                     predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
                                                                     TableScan [TS_36] (rows=73049 width=1119)
                                                                       default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
@@ -390,7 +390,7 @@ Stage-0
                                                                           PARTITION_ONLY_SHUFFLE [RS_657]
                                                                             Group By Operator [GBY_655] (rows=1 width=12)
                                                                               Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                              Select Operator [SEL_653] (rows=36525 width=1119)
+                                                                              Select Operator [SEL_653] (rows=73049 width=1119)
                                                                                 Output:["_col0"]
                                                                                  Please refer to the previous Select Operator [SEL_649]
                             <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized
@@ -481,7 +481,7 @@ Stage-0
                                                                           PARTITION_ONLY_SHUFFLE [RS_656]
                                                                             Group By Operator [GBY_654] (rows=1 width=12)
                                                                               Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                              Select Operator [SEL_651] (rows=36525 width=1119)
+                                                                              Select Operator [SEL_651] (rows=73049 width=1119)
                                                                                 Output:["_col0"]
                                                                                  Please refer to the previous Select Operator [SEL_649]
                             <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
index 7ea13c8..20da0af 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
@@ -101,9 +101,9 @@ Stage-0
                               <-Map 12 [SIMPLE_EDGE] vectorized
                                 SHUFFLE [RS_119]
                                   PartitionCols:_col0
-                                  Select Operator [SEL_118] (rows=852 width=1910)
+                                  Select Operator [SEL_118] (rows=1704 width=1910)
                                     Output:["_col0","_col1"]
-                                    Filter Operator [FIL_117] (rows=852 width=1910)
+                                    Filter Operator [FIL_117] (rows=1704 width=1910)
                                       predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null)
                                       TableScan [TS_9] (rows=1704 width=1910)
                                         default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
@@ -163,7 +163,7 @@ Stage-0
                                                     SHUFFLE [RS_122]
                                                       Group By Operator [GBY_121] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_120] (rows=852 width=1910)
+                                                        Select Operator [SEL_120] (rows=1704 width=1910)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_118]
                                               <-Reducer 15 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query29.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
index 9bfcdfa..b051622 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
@@ -289,9 +289,9 @@ Stage-0
                           <-Map 6 [SIMPLE_EDGE] vectorized
                             PARTITION_ONLY_SHUFFLE [RS_208]
                               PartitionCols:_col0
-                              Select Operator [SEL_207] (rows=36525 width=1119)
+                              Select Operator [SEL_207] (rows=73049 width=1119)
                                 Output:["_col0"]
-                                Filter Operator [FIL_206] (rows=36525 width=1119)
+                                Filter Operator [FIL_206] (rows=73049 width=1119)
                                   predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null)
                                   TableScan [TS_3] (rows=73049 width=1119)
                                     default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
@@ -312,7 +312,7 @@ Stage-0
                                         PARTITION_ONLY_SHUFFLE [RS_211]
                                           Group By Operator [GBY_210] (rows=1 width=12)
                                             Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                            Select Operator [SEL_209] (rows=36525 width=1119)
+                                            Select Operator [SEL_209] (rows=73049 width=1119)
                                               Output:["_col0"]
                                                Please refer to the previous Select Operator [SEL_207]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/query34.q.out
index 9b7b482..994e602 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query34.q.out
@@ -115,9 +115,9 @@ Stage-0
                             <-Map 14 [SIMPLE_EDGE] vectorized
                               SHUFFLE [RS_119]
                                 PartitionCols:_col0
-                                Select Operator [SEL_118] (rows=852 width=1910)
+                                Select Operator [SEL_118] (rows=1704 width=1910)
                                   Output:["_col0"]
-                                  Filter Operator [FIL_117] (rows=852 width=1910)
+                                  Filter Operator [FIL_117] (rows=1704 width=1910)
                                     predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null)
                                     TableScan [TS_12] (rows=1704 width=1910)
                                       default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county"]
@@ -132,7 +132,7 @@ Stage-0
                                     Select Operator [SEL_11] (rows=1200 width=107)
                                       Output:["_col0"]
                                       Filter Operator [FIL_55] (rows=1200 width=107)
-                                        predicate:(((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null)
+                                        predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null)
                                         TableScan [TS_9] (rows=7200 width=107)
                                           default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"]
                                 <-Reducer 6 [SIMPLE_EDGE]
@@ -143,9 +143,9 @@ Stage-0
                                     <-Map 10 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_109]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_108] (rows=8116 width=1119)
+                                        Select Operator [SEL_108] (rows=16232 width=1119)
                                           Output:["_col0"]
-                                          Filter Operator [FIL_107] (rows=8116 width=1119)
+                                          Filter Operator [FIL_107] (rows=16232 width=1119)
                                             predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null)
                                             TableScan [TS_6] (rows=73049 width=1119)
                                               default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
@@ -166,7 +166,7 @@ Stage-0
                                                   SHUFFLE [RS_112]
                                                     Group By Operator [GBY_111] (rows=1 width=12)
                                                       Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                      Select Operator [SEL_110] (rows=8116 width=1119)
+                                                      Select Operator [SEL_110] (rows=16232 width=1119)
                                                         Output:["_col0"]
                                                          Please refer to the previous Select Operator [SEL_108]
                                             <-Reducer 13 [BROADCAST_EDGE] vectorized
@@ -188,7 +188,7 @@ Stage-0
                                                   SHUFFLE [RS_122]
                                                     Group By Operator [GBY_121] (rows=1 width=12)
                                                       Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                      Select Operator [SEL_120] (rows=852 width=1910)
+                                                      Select Operator [SEL_120] (rows=1704 width=1910)
                                                         Output:["_col0"]
                                                          Please refer to the previous Select Operator [SEL_118]
                                             <-Reducer 4 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query36.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query36.q.out b/ql/src/test/results/clientpositive/perf/tez/query36.q.out
index c86c9e4..bc9a40e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query36.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query36.q.out
@@ -121,9 +121,9 @@ Stage-0
                                     <-Map 10 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_93]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_92] (rows=852 width=1910)
+                                        Select Operator [SEL_92] (rows=1704 width=1910)
                                           Output:["_col0"]
-                                          Filter Operator [FIL_91] (rows=852 width=1910)
+                                          Filter Operator [FIL_91] (rows=1704 width=1910)
                                             predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') and s_store_sk is not null)
                                             TableScan [TS_6] (rows=1704 width=1910)
                                               default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
@@ -158,7 +158,7 @@ Stage-0
                                                       SHUFFLE [RS_96]
                                                         Group By Operator [GBY_95] (rows=1 width=12)
                                                           Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                          Select Operator [SEL_94] (rows=852 width=1910)
+                                                          Select Operator [SEL_94] (rows=1704 width=1910)
                                                             Output:["_col0"]
                                                              Please refer to the previous Select Operator [SEL_92]
                                                 <-Reducer 13 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query37.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query37.q.out b/ql/src/test/results/clientpositive/perf/tez/query37.q.out
index 2b3ae52..6b5ce3e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query37.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query37.q.out
@@ -67,9 +67,9 @@ Stage-0
                       <-Map 5 [SIMPLE_EDGE] vectorized
                         PARTITION_ONLY_SHUFFLE [RS_76]
                           PartitionCols:_col0
-                          Select Operator [SEL_75] (rows=25666 width=1436)
+                          Select Operator [SEL_75] (rows=51333 width=1436)
                             Output:["_col0","_col1","_col2","_col3"]
-                            Filter Operator [FIL_74] (rows=25666 width=1436)
+                            Filter Operator [FIL_74] (rows=51333 width=1436)
                               predicate:((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null)
                               TableScan [TS_3] (rows=462000 width=1436)
                                 default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"]
@@ -113,7 +113,7 @@ Stage-0
                                     PARTITION_ONLY_SHUFFLE [RS_79]
                                       Group By Operator [GBY_78] (rows=1 width=12)
                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                        Select Operator [SEL_77] (rows=25666 width=1436)
+                                        Select Operator [SEL_77] (rows=51333 width=1436)
                                           Output:["_col0"]
                                            Please refer to the previous Select Operator [SEL_75]
                               <-Reducer 9 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query45.q.out b/ql/src/test/results/clientpositive/perf/tez/query45.q.out
index edb047d..6458811 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query45.q.out
@@ -91,9 +91,9 @@ Stage-0
                                   SHUFFLE [RS_150]
                                     Group By Operator [GBY_148] (rows=1 width=16)
                                       Output:["_col0","_col1"],aggregations:["count()","count(i_item_id)"]
-                                      Select Operator [SEL_145] (rows=231000 width=1436)
+                                      Select Operator [SEL_145] (rows=462000 width=1436)
                                         Output:["i_item_id"]
-                                        Filter Operator [FIL_142] (rows=231000 width=1436)
+                                        Filter Operator [FIL_142] (rows=462000 width=1436)
                                           predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
                                           TableScan [TS_6] (rows=462000 width=1436)
                                             default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
@@ -122,18 +122,18 @@ Stage-0
                                         <-Reducer 12 [ONE_TO_ONE_EDGE] vectorized
                                           FORWARD [RS_153]
                                             PartitionCols:_col0
-                                            Select Operator [SEL_152] (rows=115500 width=1436)
+                                            Select Operator [SEL_152] (rows=231000 width=1436)
                                               Output:["_col0","_col1"]
-                                              Group By Operator [GBY_151] (rows=115500 width=1436)
+                                              Group By Operator [GBY_151] (rows=231000 width=1436)
                                                 Output:["_col0"],keys:KEY._col0
                                               <-Map 8 [SIMPLE_EDGE] vectorized
                                                 SHUFFLE [RS_149]
                                                   PartitionCols:_col0
-                                                  Group By Operator [GBY_147] (rows=231000 width=1436)
+                                                  Group By Operator [GBY_147] (rows=462000 width=1436)
                                                     Output:["_col0"],keys:i_item_id
-                                                    Select Operator [SEL_144] (rows=231000 width=1436)
+                                                    Select Operator [SEL_144] (rows=462000 width=1436)
                                                       Output:["i_item_id"]
-                                                      Filter Operator [FIL_141] (rows=231000 width=1436)
+                                                      Filter Operator [FIL_141] (rows=462000 width=1436)
                                                         predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
                                                          Please refer to the previous TableScan [TS_6]
                                     <-Reducer 15 [SIMPLE_EDGE]

[2/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query85.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
index 4e42d69..bd6f45f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
@@ -167,16 +167,16 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 11 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE)
+Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE)
 Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE)
-Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE)
+Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE)
-Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Map 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
 Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
@@ -186,144 +186,144 @@ Stage-0
     Stage-1
       Reducer 10 vectorized
       File Output Operator [FS_244]
-        Limit [LIM_243] (rows=100 width=385)
+        Limit [LIM_243] (rows=100 width=1014)
           Number of rows:100
-          Select Operator [SEL_242] (rows=1023990 width=385)
+          Select Operator [SEL_242] (rows=4436665 width=1014)
             Output:["_col0","_col1","_col2","_col3"]
           <-Reducer 9 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_241]
-              Select Operator [SEL_240] (rows=1023990 width=385)
+              Select Operator [SEL_240] (rows=4436665 width=1014)
                 Output:["_col4","_col5","_col6","_col7"]
-                Group By Operator [GBY_239] (rows=1023990 width=385)
+                Group By Operator [GBY_239] (rows=4436665 width=1014)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0
                 <-Reducer 8 [SIMPLE_EDGE]
                   SHUFFLE [RS_49]
                     PartitionCols:_col0
-                    Group By Operator [GBY_48] (rows=2047980 width=385)
-                      Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22
-                      Merge Join Operator [MERGEJOIN_206] (rows=2047980 width=385)
-                        Conds:RS_44._col3, _col24, _col25=RS_237._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"]
+                    Group By Operator [GBY_48] (rows=8873331 width=1014)
+                      Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","count(_col4)","sum(_col16)","count(_col16)","sum(_col15)","count(_col15)"],keys:_col28
+                      Merge Join Operator [MERGEJOIN_206] (rows=8873331 width=1014)
+                        Conds:RS_44._col13=RS_238._col0(Inner),Output:["_col4","_col15","_col16","_col28"]
                       <-Map 18 [SIMPLE_EDGE] vectorized
-                        SHUFFLE [RS_237]
-                          PartitionCols:_col0, _col1, _col2
-                          Select Operator [SEL_236] (rows=1861800 width=385)
-                            Output:["_col0","_col1","_col2"]
-                            Filter Operator [FIL_235] (rows=1861800 width=385)
-                              predicate:(((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null)
-                              TableScan [TS_21] (rows=1861800 width=385)
-                                default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
+                        SHUFFLE [RS_238]
+                          PartitionCols:_col0
+                          Select Operator [SEL_237] (rows=72 width=200)
+                            Output:["_col0","_col1"]
+                            Filter Operator [FIL_236] (rows=72 width=200)
+                              predicate:r_reason_sk is not null
+                              TableScan [TS_21] (rows=72 width=200)
+                                default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"]
                       <-Reducer 7 [SIMPLE_EDGE]
                         SHUFFLE [RS_44]
-                          PartitionCols:_col3, _col24, _col25
-                          Filter Operator [FIL_43] (rows=393687 width=135)
-                            predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200))
-                            Merge Join Operator [MERGEJOIN_205] (rows=4724246 width=135)
-                              Conds:RS_40._col1=RS_238._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"]
-                            <-Map 18 [SIMPLE_EDGE] vectorized
-                              SHUFFLE [RS_238]
-                                PartitionCols:_col0
-                                 Please refer to the previous Select Operator [SEL_236]
-                            <-Reducer 6 [SIMPLE_EDGE]
-                              SHUFFLE [RS_40]
-                                PartitionCols:_col1
-                                Merge Join Operator [MERGEJOIN_204] (rows=4294769 width=135)
-                                  Conds:RS_37._col4=RS_234._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col12","_col13","_col22"]
-                                <-Map 17 [SIMPLE_EDGE] vectorized
-                                  SHUFFLE [RS_234]
+                          PartitionCols:_col13
+                          Merge Join Operator [MERGEJOIN_205] (rows=8066665 width=1014)
+                            Conds:RS_41._col2=RS_217._col0(Inner),Output:["_col4","_col13","_col15","_col16"]
+                          <-Map 16 [SIMPLE_EDGE] vectorized
+                            SHUFFLE [RS_217]
+                              PartitionCols:_col0
+                              Select Operator [SEL_216] (rows=4602 width=585)
+                                Output:["_col0"]
+                                Filter Operator [FIL_215] (rows=4602 width=585)
+                                  predicate:wp_web_page_sk is not null
+                                  TableScan [TS_18] (rows=4602 width=585)
+                                    default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"]
+                          <-Reducer 6 [SIMPLE_EDGE]
+                            SHUFFLE [RS_41]
+                              PartitionCols:_col2
+                              Filter Operator [FIL_40] (rows=7333332 width=1014)
+                                predicate:((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250))
+                                Merge Join Operator [MERGEJOIN_204] (rows=22000000 width=1014)
+                                  Conds:RS_37._col11=RS_235._col0(Inner),Output:["_col2","_col4","_col6","_col13","_col15","_col16","_col24"]
+                                <-Map 15 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_235]
                                     PartitionCols:_col0
-                                    Select Operator [SEL_233] (rows=72 width=200)
+                                    Select Operator [SEL_234] (rows=20000000 width=1014)
                                       Output:["_col0","_col1"]
-                                      Filter Operator [FIL_232] (rows=72 width=200)
-                                        predicate:r_reason_sk is not null
-                                        TableScan [TS_15] (rows=72 width=200)
-                                          default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"]
+                                      Filter Operator [FIL_233] (rows=20000000 width=1014)
+                                        predicate:((ca_country = 'United States') and ca_address_sk is not null)
+                                        TableScan [TS_15] (rows=40000000 width=1014)
+                                          default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
                                 <-Reducer 5 [SIMPLE_EDGE]
                                   SHUFFLE [RS_37]
-                                    PartitionCols:_col4
-                                    Merge Join Operator [MERGEJOIN_203] (rows=3904336 width=135)
-                                      Conds:RS_34._col8=RS_220._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col12","_col13"]
-                                    <-Map 15 [SIMPLE_EDGE] vectorized
-                                      SHUFFLE [RS_220]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_219] (rows=36524 width=1119)
-                                          Output:["_col0"]
-                                          Filter Operator [FIL_218] (rows=36524 width=1119)
-                                            predicate:((d_year = 1998) and d_date_sk is not null)
-                                            TableScan [TS_12] (rows=73049 width=1119)
-                                              default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+                                    PartitionCols:_col11
+                                    Merge Join Operator [MERGEJOIN_203] (rows=5856506 width=135)
+                                      Conds:RS_34._col12, _col18, _col19=RS_231._col0, _col1, _col2(Inner),Output:["_col2","_col4","_col6","_col11","_col13","_col15","_col16"]
+                                    <-Map 14 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_231]
+                                        PartitionCols:_col0, _col1, _col2
+                                        Select Operator [SEL_230] (rows=1861800 width=385)
+                                          Output:["_col0","_col1","_col2"]
+                                          Filter Operator [FIL_229] (rows=1861800 width=385)
+                                            predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null)
+                                            TableScan [TS_12] (rows=1861800 width=385)
+                                              default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
                                     <-Reducer 4 [SIMPLE_EDGE]
                                       SHUFFLE [RS_34]
-                                        PartitionCols:_col8
-                                        Merge Join Operator [MERGEJOIN_202] (rows=3549397 width=135)
-                                          Conds:RS_31._col10=RS_212._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col12","_col13"]
-                                        <-Map 13 [SIMPLE_EDGE] vectorized
-                                          SHUFFLE [RS_212]
-                                            PartitionCols:_col0
-                                            Select Operator [SEL_211] (rows=4602 width=585)
-                                              Output:["_col0"]
-                                              Filter Operator [FIL_210] (rows=4602 width=585)
-                                                predicate:wp_web_page_sk is not null
-                                                TableScan [TS_9] (rows=4602 width=585)
-                                                  default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"]
-                                        <-Reducer 3 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_31]
-                                            PartitionCols:_col10
-                                            Filter Operator [FIL_30] (rows=3226725 width=135)
-                                              predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250))
-                                              Merge Join Operator [MERGEJOIN_201] (rows=19360357 width=135)
-                                                Conds:RS_27._col2=RS_231._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col16"]
-                                              <-Map 12 [SIMPLE_EDGE] vectorized
-                                                SHUFFLE [RS_231]
-                                                  PartitionCols:_col0
-                                                  Select Operator [SEL_230] (rows=10000000 width=1014)
-                                                    Output:["_col0","_col1"]
-                                                    Filter Operator [FIL_229] (rows=10000000 width=1014)
-                                                      predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null)
-                                                      TableScan [TS_6] (rows=40000000 width=1014)
-                                                        default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
+                                        PartitionCols:_col12, _col18, _col19
+                                        Filter Operator [FIL_33] (rows=5324097 width=135)
+                                          predicate:(((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200))
+                                          Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135)
+                                            Conds:RS_30._col10=RS_232._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col11","_col12","_col13","_col15","_col16","_col18","_col19"]
+                                          <-Map 14 [SIMPLE_EDGE] vectorized
+                                            SHUFFLE [RS_232]
+                                              PartitionCols:_col0
+                                               Please refer to the previous Select Operator [SEL_230]
+                                          <-Reducer 3 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_30]
+                                              PartitionCols:_col10
+                                              Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135)
+                                                Conds:RS_27._col1, _col3=RS_228._col0, _col5(Inner),Output:["_col2","_col4","_col5","_col6","_col10","_col11","_col12","_col13","_col15","_col16"]
+                                              <-Map 13 [SIMPLE_EDGE] vectorized
+                                                SHUFFLE [RS_228]
+                                                  PartitionCols:_col0, _col5
+                                                  Select Operator [SEL_227] (rows=14398467 width=92)
+                                                    Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+                                                    Filter Operator [FIL_226] (rows=14398467 width=92)
+                                                      predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null)
+                                                      TableScan [TS_6] (rows=14398467 width=92)
+                                                        default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"]
                                               <-Reducer 2 [SIMPLE_EDGE]
                                                 SHUFFLE [RS_27]
-                                                  PartitionCols:_col2
-                                                  Merge Join Operator [MERGEJOIN_200] (rows=17600325 width=135)
-                                                    Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"]
-                                                  <-Map 1 [SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_209]
-                                                      PartitionCols:_col0, _col5
-                                                      Select Operator [SEL_208] (rows=14398467 width=92)
-                                                        Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-                                                        Filter Operator [FIL_207] (rows=14398467 width=92)
-                                                          predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null)
-                                                          TableScan [TS_0] (rows=14398467 width=92)
-                                                            default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"]
+                                                  PartitionCols:_col1, _col3
+                                                  Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135)
+                                                    Conds:RS_225._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
                                                   <-Map 11 [SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_228]
-                                                      PartitionCols:_col1, _col3
-                                                      Select Operator [SEL_227] (rows=16000296 width=135)
+                                                    SHUFFLE [RS_209]
+                                                      PartitionCols:_col0
+                                                      Select Operator [SEL_208] (rows=36524 width=1119)
+                                                        Output:["_col0"]
+                                                        Filter Operator [FIL_207] (rows=36524 width=1119)
+                                                          predicate:((d_year = 1998) and d_date_sk is not null)
+                                                          TableScan [TS_3] (rows=73049 width=1119)
+                                                            default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+                                                  <-Map 1 [SIMPLE_EDGE] vectorized
+                                                    SHUFFLE [RS_225]
+                                                      PartitionCols:_col0
+                                                      Select Operator [SEL_224] (rows=48000888 width=135)
                                                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
-                                                        Filter Operator [FIL_226] (rows=16000296 width=135)
-                                                          predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_32_web_page_wp_web_page_sk_min) AND DynamicValue(RS_32_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_32_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null)
-                                                          TableScan [TS_3] (rows=144002668 width=135)
+                                                        Filter Operator [FIL_223] (rows=48000888 width=135)
+                                                          predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_42_web_page_wp_web_page_sk_min) AND DynamicValue(RS_42_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_42_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null)
+                                                          TableScan [TS_0] (rows=144002668 width=135)
                                                             default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"]
-                                                          <-Reducer 14 [BROADCAST_EDGE] vectorized
-                                                            BROADCAST [RS_217]
-                                                              Group By Operator [GBY_216] (rows=1 width=12)
+                                                          <-Reducer 12 [BROADCAST_EDGE] vectorized
+                                                            BROADCAST [RS_214]
+                                                              Group By Operator [GBY_213] (rows=1 width=12)
                                                                 Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                              <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                SHUFFLE [RS_215]
-                                                                  Group By Operator [GBY_214] (rows=1 width=12)
+                                                              <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                SHUFFLE [RS_212]
+                                                                  Group By Operator [GBY_211] (rows=1 width=12)
                                                                     Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                    Select Operator [SEL_213] (rows=4602 width=585)
+                                                                    Select Operator [SEL_210] (rows=36524 width=1119)
                                                                       Output:["_col0"]
-                                                                       Please refer to the previous Select Operator [SEL_211]
-                                                          <-Reducer 16 [BROADCAST_EDGE] vectorized
-                                                            BROADCAST [RS_225]
-                                                              Group By Operator [GBY_224] (rows=1 width=12)
+                                                                       Please refer to the previous Select Operator [SEL_208]
+                                                          <-Reducer 17 [BROADCAST_EDGE] vectorized
+                                                            BROADCAST [RS_222]
+                                                              Group By Operator [GBY_221] (rows=1 width=12)
                                                                 Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                              <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                SHUFFLE [RS_223]
-                                                                  Group By Operator [GBY_222] (rows=1 width=12)
+                                                              <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                SHUFFLE [RS_220]
+                                                                  Group By Operator [GBY_219] (rows=1 width=12)
                                                                     Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                    Select Operator [SEL_221] (rows=36524 width=1119)
+                                                                    Select Operator [SEL_218] (rows=4602 width=585)
                                                                       Output:["_col0"]
-                                                                       Please refer to the previous Select Operator [SEL_219]
+                                                                       Please refer to the previous Select Operator [SEL_216]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query89.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query89.q.out b/ql/src/test/results/clientpositive/perf/tez/query89.q.out
index ee3374e..c120cd9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query89.q.out
@@ -85,21 +85,21 @@ Stage-0
                   Select Operator [SEL_29] (rows=383325119 width=88)
                     Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
                     PTF Operator [PTF_28] (rows=383325119 width=88)
-                      Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}]
+                      Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col3, _col1, _col4, _col5"}]
                       Select Operator [SEL_27] (rows=383325119 width=88)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
                       <-Reducer 5 [SIMPLE_EDGE] vectorized
                         SHUFFLE [RS_113]
-                          PartitionCols:_col2, _col0, _col4, _col5
+                          PartitionCols:_col3, _col1, _col4, _col5
                           Group By Operator [GBY_112] (rows=383325119 width=88)
                             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5
                           <-Reducer 4 [SIMPLE_EDGE]
                             SHUFFLE [RS_23]
                               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5
                               Group By Operator [GBY_22] (rows=766650239 width=88)
-                                Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col10, _col12, _col13
+                                Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col6, _col8, _col9, _col10, _col12, _col13
                                 Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88)
-                                  Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"]
+                                  Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col6","_col8","_col9","_col10","_col12","_col13"]
                                 <-Map 12 [SIMPLE_EDGE] vectorized
                                   SHUFFLE [RS_103]
                                     PartitionCols:_col0
@@ -113,37 +113,37 @@ Stage-0
                                   SHUFFLE [RS_18]
                                     PartitionCols:_col2
                                     Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88)
-                                      Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col10"]
+                                      Conds:RS_15._col1=RS_95._col0(Inner),Output:["_col2","_col3","_col6","_col8","_col9","_col10"]
                                     <-Map 10 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_95]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_94] (rows=36524 width=1119)
-                                          Output:["_col0","_col2"]
-                                          Filter Operator [FIL_93] (rows=36524 width=1119)
-                                            predicate:((d_year = 2000) and d_date_sk is not null)
-                                            TableScan [TS_6] (rows=73049 width=1119)
-                                              default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                                        Select Operator [SEL_94] (rows=462000 width=1436)
+                                          Output:["_col0","_col1","_col2","_col3"]
+                                          Filter Operator [FIL_93] (rows=462000 width=1436)
+                                            predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null)
+                                            TableScan [TS_6] (rows=462000 width=1436)
+                                              default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"]
                                     <-Reducer 2 [SIMPLE_EDGE]
                                       SHUFFLE [RS_15]
-                                        PartitionCols:_col0
+                                        PartitionCols:_col1
                                         Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88)
-                                          Conds:RS_111._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"]
+                                          Conds:RS_111._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col6"]
                                         <-Map 8 [SIMPLE_EDGE] vectorized
                                           PARTITION_ONLY_SHUFFLE [RS_87]
                                             PartitionCols:_col0
-                                            Select Operator [SEL_86] (rows=231000 width=1436)
-                                              Output:["_col0","_col1","_col2","_col3"]
-                                              Filter Operator [FIL_85] (rows=231000 width=1436)
-                                                predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null)
-                                                TableScan [TS_3] (rows=462000 width=1436)
-                                                  default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"]
+                                            Select Operator [SEL_86] (rows=36524 width=1119)
+                                              Output:["_col0","_col2"]
+                                              Filter Operator [FIL_85] (rows=36524 width=1119)
+                                                predicate:((d_year = 2000) and d_date_sk is not null)
+                                                TableScan [TS_3] (rows=73049 width=1119)
+                                                  default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
                                         <-Map 1 [SIMPLE_EDGE] vectorized
                                           SHUFFLE [RS_111]
-                                            PartitionCols:_col1
+                                            PartitionCols:_col0
                                             Select Operator [SEL_110] (rows=575995635 width=88)
                                               Output:["_col0","_col1","_col2","_col3"]
                                               Filter Operator [FIL_109] (rows=575995635 width=88)
-                                                predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null)
+                                                predicate:((ss_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null)
                                                 TableScan [TS_0] (rows=575995635 width=88)
                                                   default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"]
                                                 <-Reducer 11 [BROADCAST_EDGE] vectorized
@@ -154,7 +154,7 @@ Stage-0
                                                       SHUFFLE [RS_98]
                                                         Group By Operator [GBY_97] (rows=1 width=12)
                                                           Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                          Select Operator [SEL_96] (rows=36524 width=1119)
+                                                          Select Operator [SEL_96] (rows=462000 width=1436)
                                                             Output:["_col0"]
                                                              Please refer to the previous Select Operator [SEL_94]
                                                 <-Reducer 13 [BROADCAST_EDGE] vectorized
@@ -176,7 +176,7 @@ Stage-0
                                                       PARTITION_ONLY_SHUFFLE [RS_90]
                                                         Group By Operator [GBY_89] (rows=1 width=12)
                                                           Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                          Select Operator [SEL_88] (rows=231000 width=1436)
+                                                          Select Operator [SEL_88] (rows=36524 width=1119)
                                                             Output:["_col0"]
                                                              Please refer to the previous Select Operator [SEL_86]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query91.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
index a53c7d79..7a64949 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
@@ -61,124 +61,122 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
-Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
+Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 6 vectorized
+      Reducer 7 vectorized
       File Output Operator [FS_170]
         Select Operator [SEL_169] (rows=58564004 width=860)
           Output:["_col0","_col1","_col2","_col3"]
-        <-Reducer 5 [SIMPLE_EDGE] vectorized
+        <-Reducer 6 [SIMPLE_EDGE] vectorized
           SHUFFLE [RS_168]
             Select Operator [SEL_167] (rows=58564004 width=860)
               Output:["_col0","_col1","_col2","_col4"]
               Group By Operator [GBY_166] (rows=58564004 width=860)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
-              <-Reducer 4 [SIMPLE_EDGE]
+              <-Reducer 5 [SIMPLE_EDGE]
                 SHUFFLE [RS_42]
                   PartitionCols:_col0, _col1, _col2, _col3, _col4
                   Group By Operator [GBY_41] (rows=117128008 width=860)
-                    Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col10, _col18, _col19
+                    Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5, _col6, _col17, _col18, _col19
                     Merge Join Operator [MERGEJOIN_144] (rows=117128008 width=860)
-                      Conds:RS_37._col1=RS_38._col2(Inner),Output:["_col3","_col8","_col9","_col10","_col18","_col19"]
-                    <-Reducer 12 [SIMPLE_EDGE]
-                      SHUFFLE [RS_38]
-                        PartitionCols:_col2
-                        Select Operator [SEL_30] (rows=106480005 width=860)
-                          Output:["_col2","_col7","_col8"]
-                          Merge Join Operator [MERGEJOIN_143] (rows=106480005 width=860)
-                            Conds:RS_27._col2=RS_165._col0(Inner),Output:["_col0","_col5","_col6"]
-                          <-Map 15 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_165]
-                              PartitionCols:_col0
-                              Select Operator [SEL_164] (rows=3600 width=107)
-                                Output:["_col0"]
-                                Filter Operator [FIL_163] (rows=3600 width=107)
-                                  predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null)
-                                  TableScan [TS_18] (rows=7200 width=107)
-                                    default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"]
-                          <-Reducer 11 [SIMPLE_EDGE]
-                            SHUFFLE [RS_27]
-                              PartitionCols:_col2
-                              Merge Join Operator [MERGEJOIN_142] (rows=96800003 width=860)
-                                Conds:RS_24._col3=RS_162._col0(Inner),Output:["_col0","_col2","_col5","_col6"]
-                              <-Map 14 [SIMPLE_EDGE] vectorized
-                                SHUFFLE [RS_162]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_161] (rows=20000000 width=1014)
-                                    Output:["_col0"]
-                                    Filter Operator [FIL_160] (rows=20000000 width=1014)
-                                      predicate:((ca_gmt_offset = -7) and ca_address_sk is not null)
-                                      TableScan [TS_15] (rows=40000000 width=1014)
-                                        default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
-                              <-Reducer 10 [SIMPLE_EDGE]
-                                SHUFFLE [RS_24]
-                                  PartitionCols:_col3
-                                  Merge Join Operator [MERGEJOIN_141] (rows=88000001 width=860)
-                                    Conds:RS_156._col1=RS_159._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"]
-                                  <-Map 13 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_159]
-                                      PartitionCols:_col0
-                                      Select Operator [SEL_158] (rows=930900 width=385)
-                                        Output:["_col0","_col1","_col2"]
-                                        Filter Operator [FIL_157] (rows=930900 width=385)
-                                          predicate:((((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and cd_demo_sk is not null)
-                                          TableScan [TS_12] (rows=1861800 width=385)
-                                            default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
-                                  <-Map 9 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_156]
-                                      PartitionCols:_col1
-                                      Select Operator [SEL_155] (rows=80000000 width=860)
-                                        Output:["_col0","_col1","_col2","_col3"]
-                                        Filter Operator [FIL_154] (rows=80000000 width=860)
-                                          predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null)
-                                          TableScan [TS_9] (rows=80000000 width=860)
-                                            default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"]
-                    <-Reducer 3 [SIMPLE_EDGE]
+                      Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"]
+                    <-Map 15 [SIMPLE_EDGE] vectorized
+                      SHUFFLE [RS_165]
+                        PartitionCols:_col0
+                        Select Operator [SEL_164] (rows=3600 width=107)
+                          Output:["_col0"]
+                          Filter Operator [FIL_163] (rows=3600 width=107)
+                            predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null)
+                            TableScan [TS_25] (rows=7200 width=107)
+                              default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"]
+                    <-Reducer 4 [SIMPLE_EDGE]
                       SHUFFLE [RS_37]
-                        PartitionCols:_col1
-                        Merge Join Operator [MERGEJOIN_140] (rows=34846646 width=106)
-                          Conds:RS_34._col2=RS_153._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"]
-                        <-Map 8 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_153]
-                            PartitionCols:_col0
-                            Select Operator [SEL_152] (rows=60 width=2045)
-                              Output:["_col0","_col1","_col2","_col3"]
-                              Filter Operator [FIL_151] (rows=60 width=2045)
-                                predicate:cc_call_center_sk is not null
-                                TableScan [TS_6] (rows=60 width=2045)
-                                  default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"]
-                        <-Reducer 2 [SIMPLE_EDGE]
-                          SHUFFLE [RS_34]
-                            PartitionCols:_col2
-                            Merge Join Operator [MERGEJOIN_139] (rows=31678769 width=106)
-                              Conds:RS_147._col0=RS_150._col0(Inner),Output:["_col1","_col2","_col3"]
-                            <-Map 1 [SIMPLE_EDGE] vectorized
-                              SHUFFLE [RS_147]
+                        PartitionCols:_col2
+                        Merge Join Operator [MERGEJOIN_143] (rows=106480005 width=860)
+                          Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"]
+                        <-Reducer 12 [SIMPLE_EDGE]
+                          SHUFFLE [RS_35]
+                            PartitionCols:_col1
+                            Merge Join Operator [MERGEJOIN_142] (rows=34846646 width=106)
+                              Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"]
+                            <-Map 14 [SIMPLE_EDGE] vectorized
+                              SHUFFLE [RS_162]
                                 PartitionCols:_col0
-                                Select Operator [SEL_146] (rows=28798881 width=106)
+                                Select Operator [SEL_161] (rows=60 width=2045)
                                   Output:["_col0","_col1","_col2","_col3"]
-                                  Filter Operator [FIL_145] (rows=28798881 width=106)
-                                    predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null)
-                                    TableScan [TS_0] (rows=28798881 width=106)
-                                      default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"]
-                            <-Map 7 [SIMPLE_EDGE] vectorized
-                              SHUFFLE [RS_150]
+                                  Filter Operator [FIL_160] (rows=60 width=2045)
+                                    predicate:cc_call_center_sk is not null
+                                    TableScan [TS_15] (rows=60 width=2045)
+                                      default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"]
+                            <-Reducer 11 [SIMPLE_EDGE]
+                              SHUFFLE [RS_21]
+                                PartitionCols:_col2
+                                Merge Join Operator [MERGEJOIN_141] (rows=31678769 width=106)
+                                  Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1","_col2","_col3"]
+                                <-Map 10 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_156]
+                                    PartitionCols:_col0
+                                    Select Operator [SEL_155] (rows=28798881 width=106)
+                                      Output:["_col0","_col1","_col2","_col3"]
+                                      Filter Operator [FIL_154] (rows=28798881 width=106)
+                                        predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null)
+                                        TableScan [TS_9] (rows=28798881 width=106)
+                                          default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"]
+                                <-Map 13 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_159]
+                                    PartitionCols:_col0
+                                    Select Operator [SEL_158] (rows=18262 width=1119)
+                                      Output:["_col0"]
+                                      Filter Operator [FIL_157] (rows=18262 width=1119)
+                                        predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null)
+                                        TableScan [TS_12] (rows=73049 width=1119)
+                                          default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                        <-Reducer 3 [SIMPLE_EDGE]
+                          SHUFFLE [RS_34]
+                            PartitionCols:_col0
+                            Merge Join Operator [MERGEJOIN_140] (rows=96800003 width=860)
+                              Conds:RS_31._col3=RS_153._col0(Inner),Output:["_col0","_col2","_col5","_col6"]
+                            <-Map 9 [SIMPLE_EDGE] vectorized
+                              SHUFFLE [RS_153]
                                 PartitionCols:_col0
-                                Select Operator [SEL_149] (rows=18262 width=1119)
+                                Select Operator [SEL_152] (rows=20000000 width=1014)
                                   Output:["_col0"]
-                                  Filter Operator [FIL_148] (rows=18262 width=1119)
-                                    predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null)
-                                    TableScan [TS_3] (rows=73049 width=1119)
-                                      default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                                  Filter Operator [FIL_151] (rows=20000000 width=1014)
+                                    predicate:((ca_gmt_offset = -7) and ca_address_sk is not null)
+                                    TableScan [TS_6] (rows=40000000 width=1014)
+                                      default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+                            <-Reducer 2 [SIMPLE_EDGE]
+                              SHUFFLE [RS_31]
+                                PartitionCols:_col3
+                                Merge Join Operator [MERGEJOIN_139] (rows=88000001 width=860)
+                                  Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"]
+                                <-Map 1 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_147]
+                                    PartitionCols:_col1
+                                    Select Operator [SEL_146] (rows=80000000 width=860)
+                                      Output:["_col0","_col1","_col2","_col3"]
+                                      Filter Operator [FIL_145] (rows=80000000 width=860)
+                                        predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null)
+                                        TableScan [TS_0] (rows=80000000 width=860)
+                                          default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"]
+                                <-Map 8 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_150]
+                                    PartitionCols:_col0
+                                    Select Operator [SEL_149] (rows=930900 width=385)
+                                      Output:["_col0","_col1","_col2"]
+                                      Filter Operator [FIL_148] (rows=930900 width=385)
+                                        predicate:((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null)
+                                        TableScan [TS_3] (rows=1861800 width=385)
+                                          default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query98.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query98.q.out b/ql/src/test/results/clientpositive/perf/tez/query98.q.out
index 4915d2b..0260972 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query98.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query98.q.out
@@ -103,9 +103,9 @@ Stage-0
                           <-Map 9 [SIMPLE_EDGE] vectorized
                             SHUFFLE [RS_68]
                               PartitionCols:_col0
-                              Select Operator [SEL_67] (rows=231000 width=1436)
+                              Select Operator [SEL_67] (rows=462000 width=1436)
                                 Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                                Filter Operator [FIL_66] (rows=231000 width=1436)
+                                Filter Operator [FIL_66] (rows=462000 width=1436)
                                   predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null)
                                   TableScan [TS_6] (rows=462000 width=1436)
                                     default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"]
@@ -140,7 +140,7 @@ Stage-0
                                             SHUFFLE [RS_71]
                                               Group By Operator [GBY_70] (rows=1 width=12)
                                                 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                Select Operator [SEL_69] (rows=231000 width=1436)
+                                                Select Operator [SEL_69] (rows=462000 width=1436)
                                                   Output:["_col0"]
                                                    Please refer to the previous Select Operator [SEL_67]
                                       <-Reducer 8 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pointlookup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
index 69ae098..a4ca803 100644
--- a/ql/src/test/results/clientpositive/pointlookup.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -149,14 +149,14 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (struct(key,value)) IN (const struct('0','val_0'), const struct('1','val_1'), const struct('2','val_2'), const struct('3','val_3'), const struct('4','val_4'), const struct('5','val_5'), const struct('6','val_6'), const struct('7','val_7'), const struct('8','val_8'), const struct('9','val_9'), const struct('10','val_10')) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -254,14 +254,14 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (struct(key,value)) IN (const struct('0','val_0'), const struct('1','val_1'), const struct('2','val_2'), const struct('3','val_3'), const struct('4','val_4'), const struct('5','val_5'), const struct('6','val_6'), const struct('7','val_7'), const struct('8','val_8'), const struct('9','val_9'), const struct('10','val_10')) (type: boolean)
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
index 1eba541..b627a56 100644
--- a/ql/src/test/results/clientpositive/pointlookup2.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -2593,7 +2593,7 @@ STAGE PLANS:
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
-            Statistics: Num rows: 10 Data size: 270 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 20 Data size: 540 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -2622,7 +2622,7 @@ STAGE PLANS:
               key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 10 Data size: 270 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 20 Data size: 540 Basic stats: COMPLETE Column stats: NONE
               tag: -1
               value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
               auto parallelism: false
@@ -2659,13 +2659,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 10 Data size: 270 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 20 Data size: 540 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 10 Data size: 270 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 20 Data size: 540 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2719,15 +2719,15 @@ STAGE PLANS:
             Filter Operator
               isSamplingPred: false
               predicate: (key) IN (1, 2) (type: boolean)
-              Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   null sort order: 
                   sort order: 
-                  Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
                   tag: 0
                   value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                   auto parallelism: false
@@ -2960,11 +2960,11 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 30 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 60 Data size: 1620 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col0,_col3)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-            Statistics: Num rows: 8 Data size: 216 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 30 Data size: 810 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -2993,7 +2993,7 @@ STAGE PLANS:
               key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 8 Data size: 216 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 30 Data size: 810 Basic stats: COMPLETE Column stats: NONE
               tag: -1
               value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
               auto parallelism: false
@@ -3030,13 +3030,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 8 Data size: 216 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 30 Data size: 810 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 8 Data size: 216 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 30 Data size: 810 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out
index 8835d41..855e2da 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -2318,15 +2318,15 @@ STAGE PLANS:
             Filter Operator
               isSamplingPred: false
               predicate: (key) IN (1, 2) (type: boolean)
-              Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   null sort order: 
                   sort order: 
-                  Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
                   tag: 1
                   value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string)
                   auto parallelism: false
@@ -2493,11 +2493,11 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-          Statistics: Num rows: 1200 Data size: 20400 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 2400 Data size: 40800 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
-            Statistics: Num rows: 300 Data size: 5100 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1200 Data size: 20400 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -2526,7 +2526,7 @@ STAGE PLANS:
               key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 300 Data size: 5100 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1200 Data size: 20400 Basic stats: COMPLETE Column stats: NONE
               tag: -1
               value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string)
               auto parallelism: false
@@ -2563,13 +2563,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-          Statistics: Num rows: 300 Data size: 5100 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1200 Data size: 20400 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 300 Data size: 5100 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1200 Data size: 20400 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
index b38088f..6d5da08 100644
--- a/ql/src/test/results/clientpositive/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
@@ -382,7 +382,7 @@ STAGE PLANS:
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                  predicate: (_col0) IN ('a', 'b') (type: boolean)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
@@ -392,7 +392,7 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Filter Operator
-                  predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                  predicate: (_col0) IN ('c', 'd') (type: boolean)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
index a9c0051..712fb37 100644
--- a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
+++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
@@ -460,13 +460,13 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc_n0
-            filterExpr: (locid) IN (5) (type: boolean)
+            filterExpr: (locid = 5) (type: boolean)
             Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (locid) IN (5) (type: boolean)
+              predicate: (locid = 5) (type: boolean)
               Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
-                expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
+                expressions: state (type: string), 5 (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out
index 894ab3d..9cc2de4 100644
--- a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out
+++ b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out
@@ -720,13 +720,13 @@ STAGE PLANS:
                   1 
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 residual filter predicates: {(struct(_col0,_col3)) IN (const struct(100,100), const struct(101,101), const struct(102,102))}
-                Statistics: Num rows: 6 Data size: 119 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 18 Data size: 357 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 10
-                  Statistics: Num rows: 6 Data size: 119 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 6 Data size: 119 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/auto_join19.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out
index d7d8cae..07bee85 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out
@@ -56,7 +56,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: src1
-                  filterExpr: (((ds = '2008-04-08') or (ds = '2008-04-09')) and ((hr = '12') or (hr = '11')) and key is not null) (type: boolean)
+                  filterExpr: ((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11') and key is not null) (type: boolean)
                   Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: key is not null (type: boolean)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
index e07904a..ad1561f 100644
--- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
@@ -82,10 +82,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: a
-                  filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                  filterExpr: ((key) IN (0, 5) and (ds = '1')) (type: boolean)
                   Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                    predicate: (key) IN (0, 5) (type: boolean)
                     Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string)
@@ -236,10 +236,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table1_n20
-                  filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                  filterExpr: ((key) IN (0, 5) and (ds = '1')) (type: boolean)
                   Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                    predicate: (key) IN (0, 5) (type: boolean)
                     Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string)
@@ -396,15 +396,15 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table1_n20
-                  filterExpr: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean)
+                  filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean)
                   Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean)
+                    Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
                       Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
index a35edb4..e61300b 100644
--- a/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
@@ -851,9 +851,9 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+          filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean)
           Filter Operator
-            predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+            predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean)
             Select Operator
               expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -875,9 +875,9 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+          filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean)
           Filter Operator
-            predicate: (c_int) IN (c_int, 0) (type: boolean)
+            predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean)
             Select Operator
               expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5

[5/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query85.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
index 572ba54..d1b3a2c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
@@ -166,8 +166,7 @@ limit 100
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
@@ -175,47 +174,42 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 13 
+        Map 14 
             Map Operator Tree:
                 TableScan
-                  alias: reason
-                  filterExpr: r_reason_sk is not null (type: boolean)
-                  Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+                  alias: web_page
+                  filterExpr: wp_web_page_sk is not null (type: boolean)
+                  Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: r_reason_sk is not null (type: boolean)
-                    Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: wp_web_page_sk is not null (type: boolean)
+                    Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: r_reason_sk (type: int), r_reason_desc (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
+                      expressions: wp_web_page_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col4 (type: int)
+                          0 _col2 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
-
-  Stage: Stage-3
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 11 
+        Map 15 
             Map Operator Tree:
                 TableScan
-                  alias: web_page
-                  filterExpr: wp_web_page_sk is not null (type: boolean)
-                  Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+                  alias: reason
+                  filterExpr: r_reason_sk is not null (type: boolean)
+                  Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: wp_web_page_sk is not null (type: boolean)
-                    Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+                    predicate: r_reason_sk is not null (type: boolean)
+                    Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: wp_web_page_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE
+                      expressions: r_reason_sk (type: int), r_reason_desc (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col10 (type: int)
+                          0 _col13 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -224,18 +218,38 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL SORT, 28)
-        Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 98), Reducer 2 (PARTITION-LEVEL SORT, 98)
-        Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 5), Reducer 3 (PARTITION-LEVEL SORT, 5)
-        Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 11), Reducer 4 (PARTITION-LEVEL SORT, 11)
-        Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 7), Reducer 5 (PARTITION-LEVEL SORT, 7)
-        Reducer 7 <- Reducer 6 (GROUP, 7)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 52), Map 9 (PARTITION-LEVEL SORT, 52)
+        Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67)
+        Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 68), Reducer 3 (PARTITION-LEVEL SORT, 68)
+        Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 12), Reducer 4 (PARTITION-LEVEL SORT, 12)
+        Reducer 6 <- Map 13 (PARTITION-LEVEL SORT, 165), Reducer 5 (PARTITION-LEVEL SORT, 165)
+        Reducer 7 <- Reducer 6 (GROUP, 71)
         Reducer 8 <- Reducer 7 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: web_sales
+                  filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean)
+                    Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                      Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+            Execution mode: vectorized
+        Map 10 
+            Map Operator Tree:
+                TableScan
                   alias: web_returns
                   filterExpr: (wr_item_sk is not null and wr_order_number is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_addr_sk is not null and wr_reason_sk is not null) (type: boolean)
                   Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
@@ -253,53 +267,14 @@ STAGE PLANS:
                         Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2))
             Execution mode: vectorized
-        Map 10 
-            Map Operator Tree:
-                TableScan
-                  alias: customer_address
-                  filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
-                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
-                    Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ca_address_sk (type: int), ca_state (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
-            Execution mode: vectorized
-        Map 12 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: d_date_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized
-        Map 14 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: cd1
-                  filterExpr: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_marital_status is not null and cd_education_status is not null) (type: boolean)
+                  filterExpr: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) (type: boolean)
+                    predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
@@ -312,14 +287,14 @@ STAGE PLANS:
                         Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
             Execution mode: vectorized
-        Map 15 
+        Map 12 
             Map Operator Tree:
                 TableScan
                   alias: cd2
-                  filterExpr: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_marital_status is not null and cd_education_status is not null) (type: boolean)
+                  filterExpr: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) (type: boolean)
+                    predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
@@ -331,25 +306,44 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                         Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
+        Map 13 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_address
+                  filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+                    Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ca_address_sk (type: int), ca_state (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized
         Map 9 
             Map Operator Tree:
                 TableScan
-                  alias: web_sales
-                  filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean)
-                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                  alias: date_dim
+                  filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean)
-                    Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                      Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: int), _col3 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col1 (type: int), _col3 (type: int)
-                        Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
@@ -357,116 +351,114 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int), _col5 (type: int)
-                  1 _col1 (type: int), _col3 (type: int)
-                outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14
-                Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col2 (type: int)
-                  Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: int), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2))
+                  key expressions: _col1 (type: int), _col3 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col1 (type: int), _col3 (type: int)
+                  Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
         Reducer 3 
-            Local Work:
-              Map Reduce Local Work
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14, _col16
-                Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) (type: boolean)
-                  Statistics: Num rows: 3226725 Data size: 438741326 Basic stats: COMPLETE Column stats: NONE
-                  Map Join Operator
-                    condition map:
-                         Inner Join 0 to 1
-                    keys:
-                      0 _col10 (type: int)
-                      1 _col0 (type: int)
-                    outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col8, _col12, _col13
-                    input vertices:
-                      1 Map 11
-                    Statistics: Num rows: 3549397 Data size: 482615469 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col8 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col8 (type: int)
-                      Statistics: Num rows: 3549397 Data size: 482615469 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2))
+                  0 _col1 (type: int), _col3 (type: int)
+                  1 _col0 (type: int), _col5 (type: int)
+                outputColumnNames: _col2, _col4, _col5, _col6, _col10, _col11, _col12, _col13, _col15, _col16
+                Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col10 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col10 (type: int)
+                  Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2))
         Reducer 4 
-            Local Work:
-              Map Reduce Local Work
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col8 (type: int)
+                  0 _col10 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col12, _col13
-                Statistics: Num rows: 3904336 Data size: 530877027 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col4 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col1, _col3, _col6, _col7, _col12, _col13, _col22
-                  input vertices:
-                    1 Map 13
-                  Statistics: Num rows: 4294769 Data size: 583964742 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col2, _col4, _col5, _col6, _col11, _col12, _col13, _col15, _col16, _col18, _col19
+                Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) (type: boolean)
+                  Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 4294769 Data size: 583964742 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col22 (type: string)
+                    key expressions: _col12 (type: int), _col18 (type: string), _col19 (type: string)
+                    sort order: +++
+                    Map-reduce partition columns: _col12 (type: int), _col18 (type: string), _col19 (type: string)
+                    Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2))
         Reducer 5 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col3, _col6, _col7, _col12, _col13, _col22, _col24, _col25
-                Statistics: Num rows: 4724246 Data size: 642361230 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean)
-                  Statistics: Num rows: 393687 Data size: 53530079 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col3 (type: int), _col24 (type: string), _col25 (type: string)
-                    sort order: +++
-                    Map-reduce partition columns: _col3 (type: int), _col24 (type: string), _col25 (type: string)
-                    Statistics: Num rows: 393687 Data size: 53530079 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col22 (type: string)
+                  0 _col12 (type: int), _col18 (type: string), _col19 (type: string)
+                  1 _col0 (type: int), _col1 (type: string), _col2 (type: string)
+                outputColumnNames: _col2, _col4, _col6, _col11, _col13, _col15, _col16
+                Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col11 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col11 (type: int)
+                  Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2))
         Reducer 6 
+            Local Work:
+              Map Reduce Local Work
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col3 (type: int), _col24 (type: string), _col25 (type: string)
-                  1 _col0 (type: int), _col1 (type: string), _col2 (type: string)
-                outputColumnNames: _col6, _col7, _col12, _col22
-                Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6)
-                  keys: _col22 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                  Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint)
+                  0 _col11 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col2, _col4, _col6, _col13, _col15, _col16, _col24
+                Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: ((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) (type: boolean)
+                  Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col2 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col4, _col13, _col15, _col16
+                    input vertices:
+                      1 Map 14
+                    Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 _col13 (type: int)
+                        1 _col0 (type: int)
+                      outputColumnNames: _col4, _col15, _col16, _col28
+                      input vertices:
+                        1 Map 15
+                      Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: sum(_col4), count(_col4), sum(_col16), count(_col16), sum(_col15), count(_col15)
+                        keys: _col28 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                        Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint)
         Reducer 7 
             Execution mode: vectorized
             Reduce Operator Tree:
@@ -475,15 +467,15 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: (_col1 / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string)
                   outputColumnNames: _col4, _col5, _col6, _col7
-                  Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22))
                     sort order: ++++
-                    Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 8 
             Execution mode: vectorized
@@ -491,13 +483,13 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22))
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 100
-                  Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query89.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query89.q.out b/ql/src/test/results/clientpositive/perf/spark/query89.q.out
index 1acc577..203a141 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query89.q.out
@@ -86,8 +86,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400)
-        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442)
         Reducer 4 <- Reducer 3 (GROUP, 529)
         Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265)
         Reducer 6 <- Reducer 5 (SORT, 1)
@@ -107,33 +107,13 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: int)
-                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
-            Execution mode: vectorized
-        Map 7 
-            Map Operator Tree:
-                TableScan
-                  alias: item
-                  filterExpr: (((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and (((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean)
-                  Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string)
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
             Execution mode: vectorized
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -153,22 +133,42 @@ STAGE PLANS:
                         Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col2 (type: int)
             Execution mode: vectorized
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: item
+                  filterExpr: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean)
+                  Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean)
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+            Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: int)
+                  0 _col0 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7
+                outputColumnNames: _col1, _col2, _col3, _col6
                 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int)
+                  key expressions: _col1 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
+                  Map-reduce partition columns: _col1 (type: int)
                   Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string)
+                  value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col6 (type: int)
         Reducer 3 
             Local Work:
               Map Reduce Local Work
@@ -177,9 +177,9 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
+                  0 _col1 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col10
+                outputColumnNames: _col2, _col3, _col6, _col8, _col9, _col10
                 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
@@ -187,20 +187,20 @@ STAGE PLANS:
                   keys:
                     0 _col2 (type: int)
                     1 _col0 (type: int)
-                  outputColumnNames: _col3, _col5, _col6, _col7, _col10, _col12, _col13
+                  outputColumnNames: _col3, _col6, _col8, _col9, _col10, _col12, _col13
                   input vertices:
                     1 Map 9
                   Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: sum(_col3)
-                    keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col12 (type: string), _col13 (type: string)
+                    keys: _col6 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col12 (type: string), _col13 (type: string)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                     Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string)
+                      key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
                       sort order: ++++++
-                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string)
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
                       Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col6 (type: decimal(17,2))
         Reducer 4 
@@ -208,33 +208,33 @@ STAGE PLANS:
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string)
+                keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string)
+                  key expressions: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string)
                   sort order: ++++
-                  Map-reduce partition columns: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string)
+                  Map-reduce partition columns: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string)
                   Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2))
+                  value expressions: _col0 (type: int), _col2 (type: string), _col6 (type: decimal(17,2))
         Reducer 5 
             Reduce Operator Tree:
               Select Operator
-                expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2))
+                expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2))
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: ptf_0
-                        output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2)
+                        output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2)
                         type: WINDOWING
                       Windowing table definition
                         input alias: ptf_1
                         name: windowingtablefunction
-                        order by: _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
-                        partition by: _col2, _col0, _col4, _col5
+                        order by: _col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
+                        partition by: _col3, _col1, _col4, _col5
                         raw input shape:
                         window functions:
                             window function definition
@@ -245,14 +245,14 @@ STAGE PLANS:
                               window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
                   Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2))
+                    expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2))
                     outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6
                     Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                     Filter Operator
                       predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean)
                       Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                       Select Operator
-                        expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6))
+                        expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6))
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
                         Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query91.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query91.q.out b/ql/src/test/results/clientpositive/perf/spark/query91.q.out
index de8977d..78f85ac 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query91.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query91.q.out
@@ -69,19 +69,19 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 7 
+        Map 13 
             Map Operator Tree:
                 TableScan
-                  alias: call_center
-                  filterExpr: cc_call_center_sk is not null (type: boolean)
-                  Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: cc_call_center_sk is not null (type: boolean)
-                    Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string)
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
+                      expressions: hd_demo_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col2 (type: int)
@@ -94,19 +94,19 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 13 
+        Map 12 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: call_center
+                  filterExpr: cc_call_center_sk is not null (type: boolean)
+                  Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: cc_call_center_sk is not null (type: boolean)
+                    Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col2 (type: int)
@@ -118,42 +118,61 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 750), Reducer 9 (PARTITION-LEVEL SORT, 750)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25)
-        Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 745), Reducer 2 (PARTITION-LEVEL SORT, 745)
-        Reducer 4 <- Reducer 3 (GROUP, 787)
-        Reducer 5 <- Reducer 4 (SORT, 1)
-        Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 541), Map 8 (PARTITION-LEVEL SORT, 541)
+        Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 25), Map 9 (PARTITION-LEVEL SORT, 25)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 541), Map 7 (PARTITION-LEVEL SORT, 541)
+        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 750), Reducer 2 (PARTITION-LEVEL SORT, 750)
+        Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 680), Reducer 3 (PARTITION-LEVEL SORT, 680)
+        Reducer 5 <- Reducer 4 (GROUP, 787)
+        Reducer 6 <- Reducer 5 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: catalog_returns
-                  filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean)
-                  Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+                  alias: customer
+                  filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean)
-                    Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean)
+                    Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2))
+                      expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int)
+            Execution mode: vectorized
+        Map 11 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 11 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: customer_demographics
-                  filterExpr: (((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and (((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and cd_demo_sk is not null) (type: boolean)
+                  filterExpr: ((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and cd_demo_sk is not null) (type: boolean)
+                    predicate: ((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
@@ -166,7 +185,7 @@ STAGE PLANS:
                         Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
             Execution mode: vectorized
-        Map 12 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
@@ -185,44 +204,25 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 6 
+        Map 9 
             Map Operator Tree:
                 TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  alias: catalog_returns
+                  filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean)
+                  Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean)
+                    Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: d_date_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2))
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized
-        Map 8 
-            Map Operator Tree:
-                TableScan
-                  alias: customer
-                  filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean)
-                    Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int)
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: int)
-                        Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int)
+                        Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
             Execution mode: vectorized
         Reducer 10 
             Local Work:
@@ -232,38 +232,6 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col3 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col2, _col5, _col6
-                Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col2 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col5, _col6
-                  input vertices:
-                    1 Map 13
-                  Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: int), _col5 (type: string), _col6 (type: string)
-                    outputColumnNames: _col2, _col7, _col8
-                    Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col2 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col2 (type: int)
-                      Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col7 (type: string), _col8 (type: string)
-        Reducer 2 
-            Local Work:
-              Map Reduce Local Work
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col1, _col2, _col3
@@ -276,7 +244,7 @@ STAGE PLANS:
                     1 _col0 (type: int)
                   outputColumnNames: _col1, _col3, _col8, _col9, _col10
                   input vertices:
-                    1 Map 7
+                    1 Map 12
                   Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: int)
@@ -284,29 +252,73 @@ STAGE PLANS:
                     Map-reduce partition columns: _col1 (type: int)
                     Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col3 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col10 (type: string)
-        Reducer 3 
+        Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
                   0 _col1 (type: int)
-                  1 _col2 (type: int)
-                outputColumnNames: _col3, _col8, _col9, _col10, _col18, _col19
-                Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: sum(_col3)
-                  keys: _col8 (type: string), _col9 (type: string), _col10 (type: string), _col18 (type: string), _col19 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2, _col3, _col5, _col6
+                Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col3 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col3 (type: int)
+                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col3 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2, _col5, _col6
+                Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: int), _col5 (type: string), _col6 (type: string)
+        Reducer 4 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col1 (type: int)
+                outputColumnNames: _col2, _col5, _col6, _col12, _col17, _col18, _col19
+                Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col2 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col5, _col6, _col12, _col17, _col18, _col19
+                  input vertices:
+                    1 Map 13
                   Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
-                    sort order: +++++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+                  Group By Operator
+                    aggregations: sum(_col12)
+                    keys: _col5 (type: string), _col6 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col5 (type: decimal(17,2))
-        Reducer 4 
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+                      sort order: +++++
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+                      Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col5 (type: decimal(17,2))
+        Reducer 5 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
@@ -316,7 +328,7 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2))
+                  expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2))
                   outputColumnNames: _col0, _col1, _col2, _col4
                   Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
@@ -324,7 +336,7 @@ STAGE PLANS:
                     sort order: -
                     Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-        Reducer 5 
+        Reducer 6 
             Execution mode: vectorized
             Reduce Operator Tree:
               Select Operator
@@ -338,22 +350,6 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 9 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col2, _col3, _col5, _col6
-                Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col3 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col3 (type: int)
-                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string)
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query98.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query98.q.out b/ql/src/test/results/clientpositive/perf/spark/query98.q.out
index c82607d..f1d7caa 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query98.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query98.q.out
@@ -94,7 +94,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Map 7 (PARTITION-LEVEL SORT, 440)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 442), Map 7 (PARTITION-LEVEL SORT, 442)
         Reducer 3 <- Reducer 2 (GROUP, 481)
         Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 241)
         Reducer 5 <- Reducer 4 (SORT, 1)
@@ -140,16 +140,16 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
             Execution mode: vectorized
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
index a8f097f..5b55d44 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
@@ -195,9 +195,9 @@ Stage-0
                                     <-Map 8 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_179]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_178] (rows=20000000 width=1014)
+                                        Select Operator [SEL_178] (rows=40000000 width=1014)
                                           Output:["_col0"]
-                                          Filter Operator [FIL_177] (rows=20000000 width=1014)
+                                          Filter Operator [FIL_177] (rows=40000000 width=1014)
                                             predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
                                             TableScan [TS_3] (rows=40000000 width=1014)
                                               default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/query12.q.out
index d3d8df0..151bebf 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query12.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query12.q.out
@@ -107,9 +107,9 @@ Stage-0
                             <-Map 9 [SIMPLE_EDGE] vectorized
                               SHUFFLE [RS_69]
                                 PartitionCols:_col0
-                                Select Operator [SEL_68] (rows=231000 width=1436)
+                                Select Operator [SEL_68] (rows=462000 width=1436)
                                   Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                                  Filter Operator [FIL_67] (rows=231000 width=1436)
+                                  Filter Operator [FIL_67] (rows=462000 width=1436)
                                     predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null)
                                     TableScan [TS_6] (rows=462000 width=1436)
                                       default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"]
@@ -144,7 +144,7 @@ Stage-0
                                               SHUFFLE [RS_72]
                                                 Group By Operator [GBY_71] (rows=1 width=12)
                                                   Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                  Select Operator [SEL_70] (rows=231000 width=1436)
+                                                  Select Operator [SEL_70] (rows=462000 width=1436)
                                                     Output:["_col0"]
                                                      Please refer to the previous Select Operator [SEL_68]
                                         <-Reducer 8 [BROADCAST_EDGE] vectorized

[9/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20c95c1c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20c95c1c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20c95c1c

Branch: refs/heads/master
Commit: 20c95c1c00679193cfe639cab26fc8309d72895b
Parents: 27bdbda
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Tue Apr 3 08:14:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sun Aug 5 22:31:16 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +-
 .../ql/exec/vector/VectorizationContext.java    |   4 +
 .../rules/HivePointLookupOptimizerRule.java     |   2 +-
 .../calcite/translator/RexNodeConverter.java    |  39 +-
 .../stats/annotation/StatsRulesProcFactory.java |  18 +-
 .../hive/ql/parse/TypeCheckProcFactory.java     | 155 +++++---
 .../ql/plan/mapping/TestCounterMapping.java     |  49 ++-
 .../test/queries/clientpositive/pointlookup.q   |   5 +-
 .../test/queries/clientpositive/pointlookup2.q  |   4 +-
 .../test/queries/clientpositive/pointlookup3.q  |   1 +
 .../alter_partition_coltype.q.out               |   5 +-
 .../clientpositive/annotate_stats_filter.q.out  |  10 +-
 .../clientpositive/annotate_stats_part.q.out    |   2 +-
 .../results/clientpositive/auto_join19.q.out    |   2 +-
 .../clientpositive/cbo_rp_simple_select.q.out   |  22 +-
 .../clientpositive/cbo_simple_select.q.out      |  22 +-
 .../clientpositive/druid_intervals.q.out        |   2 +-
 .../dynamic_partition_skip_default.q.out        |   5 +-
 .../clientpositive/filter_cond_pushdown.q.out   |  54 +--
 .../clientpositive/filter_in_or_dup.q.out       |  18 +-
 .../clientpositive/fold_eq_with_case_when.q.out |   4 +-
 .../groupby_multi_single_reducer3.q.out         |  52 +--
 .../implicit_cast_during_insert.q.out           |  18 +-
 ql/src/test/results/clientpositive/join45.q.out |   2 +-
 ql/src/test/results/clientpositive/join47.q.out |   2 +-
 .../list_bucket_query_multiskew_2.q.out         |  57 ++-
 .../clientpositive/llap/bucketpruning1.q.out    |  24 +-
 .../llap/bucketsortoptimize_insert_7.q.out      |  84 ++---
 .../clientpositive/llap/cbo_simple_select.q.out |   8 +-
 .../clientpositive/llap/check_constraint.q.out  |   4 +-
 .../llap/dynamic_partition_pruning.q.out        |   4 +-
 .../llap/enforce_constraint_notnull.q.out       |   4 +-
 .../clientpositive/llap/explainuser_1.q.out     |   2 +-
 .../clientpositive/llap/explainuser_2.q.out     |  12 +-
 .../test/results/clientpositive/llap/kryo.q.out |   4 +-
 .../llap/llap_decimal64_reader.q.out            |   8 +-
 .../llap/materialized_view_rewrite_ssb.q.out    |  20 +-
 .../llap/materialized_view_rewrite_ssb_2.q.out  |  20 +-
 .../clientpositive/llap/orc_llap_counters.q.out |  10 +-
 .../clientpositive/llap/vector_between_in.q.out |  38 +-
 .../llap/vector_string_decimal.q.out            |   6 +-
 .../clientpositive/llap/vector_struct_in.q.out  |   6 +-
 .../vector_windowing_multipartitioning.q.out    |  30 +-
 .../llap/vector_windowing_navfn.q.out           |   6 +-
 .../clientpositive/llap/vectorized_case.q.out   |  24 +-
 .../vectorized_dynamic_partition_pruning.q.out  |   4 +-
 .../llap/vectorized_timestamp.q.out             |   6 +-
 .../test/results/clientpositive/mapjoin47.q.out |   2 +-
 .../parquet_vectorization_0.q.out               |  22 +-
 ql/src/test/results/clientpositive/pcr.q.out    |  38 +-
 ql/src/test/results/clientpositive/pcs.q.out    |   6 +-
 .../clientpositive/perf/spark/query10.q.out     |   8 +-
 .../clientpositive/perf/spark/query12.q.out     |   8 +-
 .../clientpositive/perf/spark/query13.q.out     | 226 ++++++------
 .../clientpositive/perf/spark/query15.q.out     |   2 +-
 .../clientpositive/perf/spark/query16.q.out     |   4 +-
 .../clientpositive/perf/spark/query17.q.out     |  12 +-
 .../clientpositive/perf/spark/query18.q.out     |  26 +-
 .../clientpositive/perf/spark/query20.q.out     |   8 +-
 .../clientpositive/perf/spark/query23.q.out     |  30 +-
 .../clientpositive/perf/spark/query27.q.out     |   4 +-
 .../clientpositive/perf/spark/query29.q.out     |   6 +-
 .../clientpositive/perf/spark/query34.q.out     |  14 +-
 .../clientpositive/perf/spark/query36.q.out     |   4 +-
 .../clientpositive/perf/spark/query37.q.out     |   6 +-
 .../clientpositive/perf/spark/query45.q.out     |  22 +-
 .../clientpositive/perf/spark/query46.q.out     |  10 +-
 .../clientpositive/perf/spark/query48.q.out     | 128 ++++---
 .../clientpositive/perf/spark/query53.q.out     |  18 +-
 .../clientpositive/perf/spark/query56.q.out     |  24 +-
 .../clientpositive/perf/spark/query63.q.out     |  18 +-
 .../clientpositive/perf/spark/query68.q.out     | 140 ++++----
 .../clientpositive/perf/spark/query69.q.out     |   8 +-
 .../clientpositive/perf/spark/query71.q.out     |   4 +-
 .../clientpositive/perf/spark/query73.q.out     | 150 ++++----
 .../clientpositive/perf/spark/query74.q.out     |  24 +-
 .../clientpositive/perf/spark/query79.q.out     |   6 +-
 .../clientpositive/perf/spark/query82.q.out     |   8 +-
 .../clientpositive/perf/spark/query83.q.out     |  24 +-
 .../clientpositive/perf/spark/query85.q.out     | 352 +++++++++----------
 .../clientpositive/perf/spark/query89.q.out     |  92 ++---
 .../clientpositive/perf/spark/query91.q.out     | 262 +++++++-------
 .../clientpositive/perf/spark/query98.q.out     |   8 +-
 .../clientpositive/perf/tez/query10.q.out       |   4 +-
 .../clientpositive/perf/tez/query12.q.out       |   6 +-
 .../clientpositive/perf/tez/query13.q.out       | 280 ++++++++-------
 .../clientpositive/perf/tez/query15.q.out       |   2 +-
 .../clientpositive/perf/tez/query16.q.out       |   6 +-
 .../clientpositive/perf/tez/query17.q.out       |  10 +-
 .../clientpositive/perf/tez/query18.q.out       | 154 ++++----
 .../clientpositive/perf/tez/query20.q.out       |   6 +-
 .../clientpositive/perf/tez/query23.q.out       |  14 +-
 .../clientpositive/perf/tez/query27.q.out       |   6 +-
 .../clientpositive/perf/tez/query29.q.out       |   6 +-
 .../clientpositive/perf/tez/query34.q.out       |  14 +-
 .../clientpositive/perf/tez/query36.q.out       |   6 +-
 .../clientpositive/perf/tez/query37.q.out       |   6 +-
 .../clientpositive/perf/tez/query45.q.out       |  14 +-
 .../clientpositive/perf/tez/query46.q.out       |  12 +-
 .../clientpositive/perf/tez/query48.q.out       | 226 ++++++------
 .../clientpositive/perf/tez/query53.q.out       |  14 +-
 .../clientpositive/perf/tez/query56.q.out       |   8 +-
 .../clientpositive/perf/tez/query63.q.out       |  14 +-
 .../clientpositive/perf/tez/query64.q.out       |   8 +-
 .../clientpositive/perf/tez/query68.q.out       |  12 +-
 .../clientpositive/perf/tez/query69.q.out       |   4 +-
 .../clientpositive/perf/tez/query71.q.out       |   2 +-
 .../clientpositive/perf/tez/query73.q.out       |  14 +-
 .../clientpositive/perf/tez/query74.q.out       |  20 +-
 .../clientpositive/perf/tez/query79.q.out       |   6 +-
 .../clientpositive/perf/tez/query82.q.out       |   6 +-
 .../clientpositive/perf/tez/query83.q.out       |   6 +-
 .../clientpositive/perf/tez/query85.q.out       | 238 ++++++-------
 .../clientpositive/perf/tez/query89.q.out       |  46 +--
 .../clientpositive/perf/tez/query91.q.out       | 190 +++++-----
 .../clientpositive/perf/tez/query98.q.out       |   6 +-
 .../results/clientpositive/pointlookup.q.out    |  12 +-
 .../results/clientpositive/pointlookup2.q.out   |  24 +-
 .../results/clientpositive/pointlookup3.q.out   |  16 +-
 .../results/clientpositive/ppd_transform.q.out  |   4 +-
 .../clientpositive/remove_exprs_stats.q.out     |   6 +-
 .../results/clientpositive/smb_mapjoin_47.q.out |   6 +-
 .../clientpositive/spark/auto_join19.q.out      |   2 +-
 .../spark/bucketsortoptimize_insert_7.q.out     |  16 +-
 .../spark/cbo_simple_select.q.out               |   8 +-
 .../spark/groupby_multi_single_reducer3.q.out   |  44 +--
 .../spark/parquet_vectorization_0.q.out         |  22 +-
 .../test/results/clientpositive/spark/pcr.q.out |  38 +-
 .../clientpositive/spark/ppd_transform.q.out    |   4 +-
 .../spark/spark_dynamic_partition_pruning.q.out |   4 +-
 .../spark_dynamic_partition_pruning_2.q.out     |  24 +-
 .../spark/spark_explainuser_1.q.out             |   2 +-
 .../spark/vector_between_in.q.out               |  58 +--
 .../clientpositive/spark/vectorization_0.q.out  |  22 +-
 .../clientpositive/spark/vectorized_case.q.out  |  12 +-
 .../clientpositive/stat_estimate_drill.q.out    |   8 +-
 .../clientpositive/tez/explainanalyze_5.q.out   |   2 +-
 .../results/clientpositive/vector_date_1.q.out  |   6 +-
 .../vector_non_constant_in_expr.q.out           |  18 +-
 .../clientpositive/vector_struct_in.q.out       |  24 +-
 .../clientpositive/vectorized_case.q.out        |  12 +-
 .../clientpositive/vectorized_timestamp.q.out   |   6 +-
 142 files changed, 2246 insertions(+), 2124 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 093b4a7..535a56b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2140,7 +2140,7 @@ public class HiveConf extends Configuration {
         "is also left in the operator tree at the original place."),
     HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
          "Whether to transform OR clauses in Filter operators into IN clauses"),
-    HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
+    HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 2,
              "Minimum number of OR clauses needed to transform into IN clauses"),
     HIVECOUNTDISTINCTOPTIMIZER("hive.optimize.countdistinct", true,
                  "Whether to transform count distinct into two stages"),

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 97e4059..c50c698 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -3669,6 +3669,10 @@ public class VectorizationContext {
       return (Short) o;
     } else if (o instanceof Integer) {
       return (Integer) o;
+    } else if (o instanceof Short) {
+      return (Short) o;
+    } else if (o instanceof Byte) {
+      return (Byte) o;
     } else if (o instanceof Long) {
       return (Long) o;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
index 01ad41c..eff9a31 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
@@ -201,7 +201,7 @@ public abstract class HivePointLookupOptimizerRule extends RelOptRule {
             node = transformIntoInClauseCondition(rexBuilder,
                     nodeOp.getRowType(), call, minNumORClauses);
             if (node == null) {
-              return call;
+              return super.visitCall(call);
             }
           } catch (SemanticException e) {
             LOG.error("Exception in HivePointLookupOptimizerRule", e);

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index f544f58..bc47969 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -340,13 +340,19 @@ public class RexNodeConverter {
       } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
         // If it is a floor <date> operator, we need to rewrite it
         childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst);
-      } else if (calciteOp.getKind() == SqlKind.IN && childRexNodeLst.size() == 2 && isAllPrimitive) {
-        // if it is a single item in an IN clause, transform A IN (B) to A = B
-        // from IN [A,B] => EQUALS [A,B]
-        // except complex types
-        calciteOp =
-            SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=")
-                .getGenericUDF(), argTypeBldr.build(), retType);
+      } else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) {
+        if (childRexNodeLst.size() == 2) {
+          // if it is a single item in an IN clause, transform A IN (B) to A = B
+          // from IN [A,B] => EQUALS [A,B]
+          // except complex types
+          calciteOp = SqlStdOperatorTable.EQUALS;
+        } else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) {
+          // if it is more than an single item in an IN clause,
+          // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]]
+          // except complex types
+          childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst);
+          calciteOp = SqlStdOperatorTable.OR;
+        }
       }
       expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst);
     } else {
@@ -377,8 +383,9 @@ public class RexNodeConverter {
           if (udfClassName.equals("UDFToBoolean") || udfClassName.equals("UDFToByte")
               || udfClassName.equals("UDFToDouble") || udfClassName.equals("UDFToInteger")
               || udfClassName.equals("UDFToLong") || udfClassName.equals("UDFToShort")
-              || udfClassName.equals("UDFToFloat"))
+              || udfClassName.equals("UDFToFloat")) {
             castExpr = true;
+          }
         }
       }
     }
@@ -527,6 +534,19 @@ public class RexNodeConverter {
     return newChildRexNodeLst;
   }
 
+  private List<RexNode> rewriteInClauseChildren(SqlOperator op, List<RexNode> childRexNodeLst)
+      throws SemanticException {
+    assert op.getKind() == SqlKind.IN;
+    RexNode firstPred = childRexNodeLst.get(0);
+    List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
+    for (int i = 1; i < childRexNodeLst.size(); i++) {
+      newChildRexNodeLst.add(
+          cluster.getRexBuilder().makeCall(
+              SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i)));
+    }
+    return newChildRexNodeLst;
+  }
+
   private static boolean checkForStatefulFunctions(List<ExprNodeDesc> list) {
     for (ExprNodeDesc node : list) {
       if (node instanceof ExprNodeGenericFuncDesc) {
@@ -562,8 +582,9 @@ public class RexNodeConverter {
         }
       }
 
-      if (noInp > 1)
+      if (noInp > 1) {
         throw new RuntimeException("Ambiguous column mapping");
+      }
     }
 
     return ctxLookingFor;

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 01179c8..7682791 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -451,7 +451,10 @@ public class StatsRulesProcFactory {
             return numRows / 2;
           }
           columns.add(columnChild);
-          aspCtx.addAffectedColumn((ExprNodeColumnDesc) columnChild);
+
+          // not adding column as affected; since that would rescale ndv based on the other columns
+          // selectivity as well...which leads to underestimation
+          // aspCtx.addAffectedColumn((ExprNodeColumnDesc) columnChild);
           final String columnName = ((ExprNodeColumnDesc) columnChild).getColumn();
           // if column name is not contained in needed column list then it
           // is a partition column. We do not need to evaluate partition columns
@@ -521,13 +524,24 @@ public class StatsRulesProcFactory {
 
       // 3. Calculate IN selectivity
       double factor = 1d;
+      if (multiColumn) {
+        // distinct value array doesn not help that much here; think (1,1),(1,2),(2,1),(2,2) as values
+        // but that will look like (1,2) as column values...
+        factor *= children.size() - 1;
+      }
       for (int i = 0; i < columnStats.size(); i++) {
         long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint();
         // (num of distinct vals for col in IN clause  / num of distinct vals for col )
-        double columnFactor = dvs == 0 ? 0.5d : ((double) values.get(i).size() / dvs);
+        double columnFactor = dvs == 0 ? 0.5d : (1.0d / dvs);
+        if (!multiColumn) {
+          columnFactor *=values.get(0).size();
+        }
         // max can be 1, even when ndv is larger in IN clause than in column stats
         factor *= columnFactor > 1d ? 1d : columnFactor;
       }
+
+      // Clamp at 1 to be sure that we don't get out of range.
+      factor = Double.min(factor, 1.0d);
       if (!allColsFilteredByStats) {
         factor = Double.max(factor, HiveConf.getFloatVar(aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_IN_MIN_RATIO));
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index fa941a1..1a86294 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.math.BigDecimal;
-
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -72,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -88,6 +88,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
 import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -357,7 +358,9 @@ public class TypeCheckProcFactory {
       //       not be desirable for the literals; however, this used to be the default behavior
       //       for explicit decimal literals (e.g. 1.0BD), so we keep this behavior for now.
       HiveDecimal hd = HiveDecimal.create(strVal);
-      if (notNull && hd == null) return null;
+      if (notNull && hd == null) {
+        return null;
+      }
       int prec = 1;
       int scale = 0;
       if (hd != null) {
@@ -752,7 +755,7 @@ public class TypeCheckProcFactory {
     constantExpr.setFoldedFromCol(colInfo.getInternalName());
     return constantExpr;
   }
-  
+
   private static ExprNodeConstantDesc toListConstDesc(ColumnInfo colInfo, ObjectInspector inspector,
       ObjectInspector listElementOI) {
     PrimitiveObjectInspector poi = (PrimitiveObjectInspector)listElementOI;
@@ -761,12 +764,12 @@ public class TypeCheckProcFactory {
     for (Object o : values) {
       constant.add(poi.getPrimitiveJavaObject(o));
     }
-    
+
     ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), constant);
     constantExpr.setFoldedFromCol(colInfo.getInternalName());
     return constantExpr;
   }
-  
+
   private static ExprNodeConstantDesc toMapConstDesc(ColumnInfo colInfo, ObjectInspector inspector,
       ObjectInspector keyOI, ObjectInspector valueOI) {
     PrimitiveObjectInspector keyPoi = (PrimitiveObjectInspector)keyOI;
@@ -776,7 +779,7 @@ public class TypeCheckProcFactory {
     for (Map.Entry<?, ?> e : values.entrySet()) {
       constant.put(keyPoi.getPrimitiveJavaObject(e.getKey()), valuePoi.getPrimitiveJavaObject(e.getValue()));
     }
-    
+
     ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), constant);
     constantExpr.setFoldedFromCol(colInfo.getInternalName());
     return constantExpr;
@@ -791,7 +794,7 @@ public class TypeCheckProcFactory {
       PrimitiveObjectInspector fieldPoi = (PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector();
       constant.add(fieldPoi.getPrimitiveJavaObject(value));
     }
-    
+
     ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), constant);
     constantExpr.setFoldedFromCol(colInfo.getInternalName());
     return constantExpr;
@@ -1030,8 +1033,9 @@ public class TypeCheckProcFactory {
         desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList);
       } else if (funcText.equals("[")) {
         // "[]" : LSQUARE/INDEX Expression
-        if (!ctx.getallowIndexExpr())
+        if (!ctx.getallowIndexExpr()) {
           throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr));
+        }
 
         assert (children.size() == 2);
 
@@ -1134,47 +1138,44 @@ public class TypeCheckProcFactory {
                 && children.get(1) instanceof ExprNodeColumnDesc)
                 || (children.get(0) instanceof ExprNodeColumnDesc
                     && children.get(1) instanceof ExprNodeConstantDesc))) {
-          int constIdx =
-              children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1;
-
-          String constType = children.get(constIdx).getTypeString().toLowerCase();
-          String columnType = children.get(1 - constIdx).getTypeString().toLowerCase();
-          final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(columnType);
-          // Try to narrow type of constant
-          Object constVal = ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
-          try {
-            if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, new ExprNodeConstantDesc(new Integer(constVal.toString())));
-            } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, new ExprNodeConstantDesc(new Long(constVal.toString())));
-            }else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, new ExprNodeConstantDesc(new Double(constVal.toString())));
-            } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, new ExprNodeConstantDesc(new Float(constVal.toString())));
-            } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, new ExprNodeConstantDesc(new Byte(constVal.toString())));
-            } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, new ExprNodeConstantDesc(new Short(constVal.toString())));
-            } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) {
-              children.set(constIdx, NumExprProcessor.createDecimal(constVal.toString(),false));
+
+          int constIdx = children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1;
+
+          ExprNodeDesc constChild = children.get(constIdx);
+          ExprNodeDesc columnChild = children.get(1 - constIdx);
+
+            final PrimitiveTypeInfo colTypeInfo =
+                TypeInfoFactory.getPrimitiveTypeInfo(columnChild.getTypeString().toLowerCase());
+            ExprNodeDesc newChild = interpretNodeAs(colTypeInfo, constChild);
+            if (newChild == null) {
+              // non-interpretabe as that type...
+              if (genericUDF instanceof GenericUDFOPEqual) {
+                return new ExprNodeConstantDesc(false);
+              }
+            } else {
+              children.set(constIdx, newChild);
             }
-          } catch (NumberFormatException nfe) {
-            LOG.trace("Failed to narrow type of constant", nfe);
-            if ((genericUDF instanceof GenericUDFOPEqual && !NumberUtils.isNumber(constVal.toString()))) {
-              return new ExprNodeConstantDesc(false);
+        }
+        if (genericUDF instanceof GenericUDFIn && children.get(0) instanceof ExprNodeColumnDesc) {
+          ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) children.get(0);
+          final PrimitiveTypeInfo colTypeInfo =
+              TypeInfoFactory.getPrimitiveTypeInfo(columnDesc.getTypeString().toLowerCase());
+          List<ExprNodeDesc> outputOpList = children.subList(1, children.size());
+          ArrayList<ExprNodeDesc> inOperands = new ArrayList<>(outputOpList);
+          outputOpList.clear();
+
+          for (ExprNodeDesc oldChild : inOperands) {
+            if(oldChild !=null && oldChild instanceof ExprNodeConstantDesc) {
+              ExprNodeDesc newChild = interpretNodeAs(colTypeInfo, oldChild);
+              if(newChild == null) {
+                // non interpretable as target type; skip
+                continue;
+              }
+              outputOpList.add(newChild);
+            }else{
+              outputOpList.add(oldChild);
             }
           }
-
-          // if column type is char and constant type is string, then convert the constant to char
-          // type with padded spaces.
-          if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) &&
-              colTypeInfo instanceof CharTypeInfo) {
-            final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
-            final String constValue = originalValue.toString();
-            final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo);
-            final HiveChar newValue = new HiveChar(constValue, length);
-            children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue));
-          }
         }
         if (genericUDF instanceof GenericUDFOPOr) {
           // flatten OR
@@ -1238,6 +1239,50 @@ public class TypeCheckProcFactory {
       return desc;
     }
 
+    private ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) {
+      if (constChild instanceof ExprNodeConstantDesc) {
+        // Try to narrow type of constant
+        Object constVal = ((ExprNodeConstantDesc) constChild).getValue();
+        String constType = constChild.getTypeString().toLowerCase();
+        if (constVal instanceof Number || constVal instanceof String) {
+          try {
+            PrimitiveTypeEntry primitiveTypeEntry = colTypeInfo.getPrimitiveTypeEntry();
+            if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) {
+              return new ExprNodeConstantDesc(new Integer(constVal.toString()));
+            } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) {
+              return new ExprNodeConstantDesc(new Long(constVal.toString()));
+            } else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) {
+              return new ExprNodeConstantDesc(new Double(constVal.toString()));
+            } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) {
+              return new ExprNodeConstantDesc(new Float(constVal.toString()));
+            } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) {
+              return new ExprNodeConstantDesc(new Byte(constVal.toString()));
+            } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) {
+              return new ExprNodeConstantDesc(new Short(constVal.toString()));
+            } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) {
+              return NumExprProcessor.createDecimal(constVal.toString(), false);
+            }
+          } catch (NumberFormatException nfe) {
+            LOG.trace("Failed to narrow type of constant", nfe);
+            if (!NumberUtils.isNumber(constVal.toString())) {
+              return null;
+            }
+          }
+        }
+
+        // if column type is char and constant type is string, then convert the constant to char
+        // type with padded spaces.
+        if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && colTypeInfo instanceof CharTypeInfo) {
+          final Object originalValue = ((ExprNodeConstantDesc) constChild).getValue();
+          final String constValue = originalValue.toString();
+          final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo);
+          final HiveChar newValue = new HiveChar(constValue, length);
+          return new ExprNodeConstantDesc(colTypeInfo, newValue);
+        }
+      }
+      return constChild;
+    }
+
     private boolean canConvertIntoNvl(GenericUDF genericUDF, ArrayList<ExprNodeDesc> children) {
       if (genericUDF instanceof GenericUDFWhen && children.size() == 3 &&
               children.get(1) instanceof ExprNodeConstantDesc &&
@@ -1351,9 +1396,10 @@ public class TypeCheckProcFactory {
        * return null;
        */
       if (windowingTokens.contains(expr.getType())) {
-        if (!ctx.getallowWindowing())
+        if (!ctx.getallowWindowing()) {
           throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
               ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context")));
+        }
 
         return null;
       }
@@ -1367,10 +1413,11 @@ public class TypeCheckProcFactory {
       }
 
       if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
-        if (!ctx.getallowAllColRef())
+        if (!ctx.getallowAllColRef()) {
           throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
               ErrorMsg.INVALID_COLUMN
                   .getMsg("All column reference is not supported in the context")));
+        }
 
         RowResolver input = ctx.getInputRR();
         ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc();
@@ -1439,10 +1486,11 @@ public class TypeCheckProcFactory {
       }
 
       if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) {
-        if (!ctx.getallowFunctionStar())
-        throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
-            ErrorMsg.INVALID_COLUMN
-                .getMsg(".* reference is not supported in the context")));
+        if (!ctx.getallowFunctionStar()) {
+          throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
+              ErrorMsg.INVALID_COLUMN
+                  .getMsg(".* reference is not supported in the context")));
+        }
 
         RowResolver input = ctx.getInputRR();
         for (ColumnInfo colInfo : input.getColumnInfos()) {
@@ -1509,10 +1557,11 @@ public class TypeCheckProcFactory {
       ASTNode expr = (ASTNode) nd;
       ASTNode sqNode = (ASTNode) expr.getParent().getChild(1);
 
-      if (!ctx.getallowSubQueryExpr())
+      if (!ctx.getallowSubQueryExpr()) {
         throw new CalciteSubquerySemanticException(SemanticAnalyzer.generateErrorMessage(sqNode,
             ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg("Currently SubQuery expressions are only allowed as " +
                     "Where and Having Clause predicates")));
+      }
 
       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java
index b57b5dd..b705fd7 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java
@@ -32,11 +32,12 @@ import org.apache.hadoop.hive.ql.IDriver;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
 import org.apache.hadoop.hive.ql.parse.ParseException;
 import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource;
 import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
-import org.apache.hadoop.hive.ql.plan.mapper.StatsSources;
 import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup;
+import org.apache.hadoop.hive.ql.plan.mapper.StatsSources;
 import org.apache.hadoop.hive.ql.reexec.ReExecDriver;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.stats.OperatorStats;
@@ -150,6 +151,52 @@ public class TestCounterMapping {
   }
 
   @Test
+  public void testInConversion() throws ParseException {
+    String query =
+        "explain select sum(id_uv) from tu where u in (1,2) group by u";
+
+    HiveConf conf = env_setup.getTestCtx().hiveConf;
+    conf.setIntVar(ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN, 10);
+    IDriver driver = createDriver();
+
+    PlanMapper pm = getMapperForQuery(driver, query);
+    List<FilterOperator> fos = pm.getAll(FilterOperator.class);
+    OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0));
+    Object pred = filterSig.getSig().getSigMap().get("getPredicateString");
+
+    assertEquals("((u = 1) or (u = 2)) (type: boolean)", pred);
+
+  }
+
+  @Test
+  public void testBreakupAnd() throws ParseException {
+    String query =
+        "explain select sum(id_uv) from tu where u=1  and (u=2 or u=1) group by u";
+
+    IDriver driver = createDriver();
+    PlanMapper pm = getMapperForQuery(driver, query);
+    List<FilterOperator> fos = pm.getAll(FilterOperator.class);
+    OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0));
+    Object pred = filterSig.getSig().getSigMap().get("getPredicateString");
+    assertEquals("(u = 1) (type: boolean)", pred);
+  }
+
+  @Test
+  public void testBreakupAnd2() throws ParseException {
+    String query =
+        "explain select sum(id_uv) from tu where u in (1,2,3) and u=2 and u=2 and 2=u group by u";
+
+    IDriver driver = createDriver();
+    PlanMapper pm = getMapperForQuery(driver, query);
+    List<FilterOperator> fos = pm.getAll(FilterOperator.class);
+    OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0));
+    Object pred = filterSig.getSig().getSigMap().get("getPredicateString");
+    assertEquals("(u = 2) (type: boolean)", pred);
+
+  }
+
+
+  @Test
   @Ignore("needs HiveFilter mapping")
   public void testMappingJoinLookup() throws ParseException {
     IDriver driver = createDriver();

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/queries/clientpositive/pointlookup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
index 1b65cec..a808a51 100644
--- a/ql/src/test/queries/clientpositive/pointlookup.q
+++ b/ql/src/test/queries/clientpositive/pointlookup.q
@@ -1,4 +1,7 @@
 --! qt:dataset:src
+
+set hive.optimize.point.lookup.min=31;
+
 explain
 SELECT key
 FROM src
@@ -123,4 +126,4 @@ or inOutputOpt.key = null;
 
 drop table orOutput;
 drop table inOutput;
-drop table inOutputOpt;
\ No newline at end of file
+drop table inOutputOpt;

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/queries/clientpositive/pointlookup2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
index fe19381..4f2f897 100644
--- a/ql/src/test/queries/clientpositive/pointlookup2.q
+++ b/ql/src/test/queries/clientpositive/pointlookup2.q
@@ -1,5 +1,7 @@
 --! qt:dataset:src
+set hive.optimize.point.lookup.min=31;
 set hive.mapred.mode=nonstrict;
+
 drop table pcr_t1_n2;
 drop table pcr_t2_n0;
 drop table pcr_t3;
@@ -130,4 +132,4 @@ order by t1.key, t1.value, t2.ds;
 
 drop table pcr_t1_n2;
 drop table pcr_t2_n0;
-drop table pcr_t3;
\ No newline at end of file
+drop table pcr_t3;

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/queries/clientpositive/pointlookup3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup3.q b/ql/src/test/queries/clientpositive/pointlookup3.q
index f98feeb..1e061c3 100644
--- a/ql/src/test/queries/clientpositive/pointlookup3.q
+++ b/ql/src/test/queries/clientpositive/pointlookup3.q
@@ -1,4 +1,5 @@
 --! qt:dataset:src
+set hive.optimize.point.lookup.min=31;
 set hive.mapred.mode=nonstrict;
 drop table pcr_t1_n1;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 5d033a3..5727f0a 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -539,9 +539,6 @@ PREHOOK: query: explain extended select intcol from pt.alterdynamic_part_table w
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `intcol`
-FROM `pt`.`alterdynamic_part_table`
-WHERE `partcol1` = 2 AND `partcol2` = '1' OR `partcol1` = 1 AND `partcol2` = '__HIVE_DEFAULT_PARTITION__'
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
 
@@ -600,7 +597,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: alterdynamic_part_table
-          filterExpr: (((partcol1 = 2) and (partcol2 = '1')) or ((partcol1 = 1) and (partcol2 = '__HIVE_DEFAULT_PARTITION__'))) (type: boolean)
+          filterExpr: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
           Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
           GatherStats: false
           Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index 5439588..dd42cf0 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -649,18 +649,18 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            filterExpr: ((state = 'OH') or (state = 'CA')) (type: boolean)
+            filterExpr: (state) IN ('OH', 'CA') (type: boolean)
             Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: ((state = 'CA') or (state = 'OH')) (type: boolean)
-              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: (state) IN ('OH', 'CA') (type: boolean)
+              Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index bafc6de..1c67a65 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -241,7 +241,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_n4
-          filterExpr: ((year = '2001') or (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
+          filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
           Statistics: Num rows: 8 Data size: 3814 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/auto_join19.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join19.q.out b/ql/src/test/results/clientpositive/auto_join19.q.out
index 3e07ec0..e04c3bf 100644
--- a/ql/src/test/results/clientpositive/auto_join19.q.out
+++ b/ql/src/test/results/clientpositive/auto_join19.q.out
@@ -53,7 +53,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src1
-            filterExpr: (((ds = '2008-04-08') or (ds = '2008-04-09')) and ((hr = '12') or (hr = '11')) and key is not null) (type: boolean)
+            filterExpr: ((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11') and key is not null) (type: boolean)
             Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
index 2e7d796..43cb5ab 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
@@ -915,22 +915,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: cbo_t2
-            filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+            filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean)
             Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean)
+              Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
                 outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator
@@ -954,22 +955,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: cbo_t2
-            filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+            filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean)
             Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (c_int) IN (c_int, 0) (type: boolean)
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean)
+              Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
                 outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/cbo_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
index 33f0e71..2073c6b 100644
--- a/ql/src/test/results/clientpositive/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
@@ -915,22 +915,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: cbo_t2
-            filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+            filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean)
             Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean)
+              Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator
@@ -954,22 +955,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: cbo_t2
-            filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+            filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean)
             Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (c_int) IN (c_int, 0) (type: boolean)
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean)
+              Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/druid_intervals.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid_intervals.q.out b/ql/src/test/results/clientpositive/druid_intervals.q.out
index a5203c3..715623a 100644
--- a/ql/src/test/results/clientpositive/druid_intervals.q.out
+++ b/ql/src/test/results/clientpositive/druid_intervals.q.out
@@ -375,7 +375,7 @@ STAGE PLANS:
           properties:
             druid.fieldNames vc,robot
             druid.fieldTypes timestamp with local time zone,string
-            druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"in","dimension":"__time","values":["2010-01-01T08:00:00.000Z","2011-01-01T08:00:00.000Z"],"extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"robot","value":"user1"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"}
+            druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"selector","dimension":"robot","value":"user1"},{"type":"selector","dimension":"__time","value":"2010-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"__time","value":"2011-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"}
             druid.query.type scan
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
           Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out
index 97922c2..88b24a7 100644
--- a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out
+++ b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out
@@ -194,9 +194,6 @@ PREHOOK: query: explain extended select intcol from dynamic_part_table where (pa
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select intcol from dynamic_part_table where (partcol1='1' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `intcol`
-FROM `default`.`dynamic_part_table`
-WHERE `partcol1` = '1' AND (`partcol2` = '1' OR `partcol2` = '__HIVE_DEFAULT_PARTITION__')
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
 
@@ -302,7 +299,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: dynamic_part_table
-          filterExpr: ((partcol1 = '1') and ((partcol2 = '1') or (partcol2 = '__HIVE_DEFAULT_PARTITION__'))) (type: boolean)
+          filterExpr: ((partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') and (partcol1 = '1')) (type: boolean)
           Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE
           GatherStats: false
           Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
index b84a2d4..19b506b 100644
--- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
@@ -36,10 +36,10 @@ STAGE PLANS:
                   value expressions: _col1 (type: string)
           TableScan
             alias: f
-            filterExpr: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean)
+            filterExpr: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean)
+              predicate: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -143,11 +143,11 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: m
-            filterExpr: ((value <> '') and key is not null) (type: boolean)
+            alias: f
+            filterExpr: key is not null (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((value <> '') and key is not null) (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -160,11 +160,11 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
-            alias: f
-            filterExpr: (((value) IN ('2008-04-08', '2008-04-10') or (value = '2008-04-09')) and key is not null) (type: boolean)
+            alias: m
+            filterExpr: ((value <> '') and key is not null) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (((value) IN ('2008-04-08', '2008-04-10') or (value = '2008-04-09')) and key is not null) (type: boolean)
+              predicate: ((value <> '') and key is not null) (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -183,11 +183,11 @@ STAGE PLANS:
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col1, _col2, _col3
+          outputColumnNames: _col0, _col1, _col3
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: (((_col3) IN ('2008-04-08', '2008-04-10') and (_col1 = '2008-04-08')) or (_col3 = '2008-04-09')) (type: boolean)
-            Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE
+            predicate: ((((_col1 = '2008-04-08') or (_col1 = '2008-04-10')) and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean)
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -200,11 +200,11 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col1 (type: string)
+              key expressions: _col3 (type: string)
               sort order: +
-              Map-reduce partition columns: _col1 (type: string)
-              Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col2 (type: string)
+              Map-reduce partition columns: _col3 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
           TableScan
             alias: g
             filterExpr: (value <> '') (type: boolean)
@@ -226,17 +226,17 @@ STAGE PLANS:
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col1 (type: string)
+            0 _col3 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col2, _col4
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col4
+          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
           Select Operator
-            expressions: _col2 (type: string), _col4 (type: string)
+            expressions: _col0 (type: string), _col4 (type: string)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -419,10 +419,10 @@ STAGE PLANS:
                   value expressions: _col1 (type: string)
           TableScan
             alias: m
-            filterExpr: ((value <> '') and ((value = '2008-04-10') or (value = '2008-04-08')) and key is not null) (type: boolean)
+            filterExpr: ((value <> '') and key is not null) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (((value = '2008-04-10') or (value = '2008-04-08')) and (value <> '') and key is not null) (type: boolean)
+              predicate: ((value <> '') and key is not null) (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -444,15 +444,15 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col3
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean)
-            Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE
+            predicate: ((((_col1 = '2008-04-08') or (_col1 = '2008-04-10')) and ((_col1 = '2008-04-08') or (_col1 = '2008-04-09')) and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean)
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
index b50027d..f96298c 100644
--- a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
+++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
@@ -24,14 +24,14 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (key) IN ('1', '2') (type: boolean)
-              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -70,14 +70,14 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (key) IN ('1', '2') (type: boolean)
-              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -116,14 +116,14 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (key) IN ('1', '2') (type: boolean)
-              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
index d06fb60..cb4d65c 100644
--- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
+++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
@@ -139,10 +139,10 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src
-            filterExpr: ((key = '238') or (key = '94')) (type: boolean)
+            filterExpr: (key) IN ('238', '94') (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key = '238') or (key = '94')) (type: boolean)
+              predicate: (key) IN ('238', '94') (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
index 7ec51ff..53ddde0 100644
--- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
+++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
@@ -55,32 +55,32 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
                 key expressions: key (type: string)
                 sort order: +
                 Map-reduce partition columns: key (type: string)
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 value expressions: value (type: string)
       Reduce Operator Tree:
         Forward
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             predicate: ((KEY._col0) IN (100, 150, 200) and (VALUE._col0) IN ('val_100', 'val_200', 'val_300')) (type: boolean)
-            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
               mode: complete
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -89,7 +89,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: int)
                   outputColumnNames: key, count
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll')
                     mode: hash
@@ -103,20 +103,20 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
           Filter Operator
             predicate: ((KEY._col0) IN (400, 450) and (VALUE._col0) IN ('val_400', 'val_500')) (type: boolean)
-            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
               mode: complete
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -125,7 +125,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: int)
                   outputColumnNames: key, count
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll')
                     mode: hash
@@ -565,32 +565,32 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Reduce Output Operator
                 key expressions: key (type: string)
                 sort order: +
                 Map-reduce partition columns: key (type: string)
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 value expressions: value (type: string)
       Reduce Operator Tree:
         Forward
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             predicate: ((KEY._col0) IN (100, 150, 200) and (VALUE._col0) IN ('val_100', 'val_200', 'val_300')) (type: boolean)
-            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
               mode: complete
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -599,7 +599,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: int)
                   outputColumnNames: key, count
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll')
                     mode: hash
@@ -613,20 +613,20 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
           Filter Operator
             predicate: ((KEY._col0) IN (400, 450) and (VALUE._col0) IN ('val_400', 'val_500')) (type: boolean)
-            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
               mode: complete
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -635,7 +635,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: int)
                   outputColumnNames: key, count
-                  Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll')
                     mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
index 5e974bf..f2f1a50 100644
--- a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
+++ b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
@@ -33,25 +33,25 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (key) IN (0, 1) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: value (type: string), key (type: string)
                 outputColumnNames: _col1, _col2
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col2 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col2 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Select Operator
           expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -60,20 +60,20 @@ STAGE PLANS:
           Select Operator
             expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
             outputColumnNames: c1, c2, p1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Group By Operator
               aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
               keys: p1 (type: string)
               mode: complete
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/join45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join45.q.out b/ql/src/test/results/clientpositive/join45.q.out
index 4365d52..cbabf7f 100644
--- a/ql/src/test/results/clientpositive/join45.q.out
+++ b/ql/src/test/results/clientpositive/join45.q.out
@@ -673,7 +673,7 @@ STAGE PLANS:
           Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
             predicate: (struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102)) (type: boolean)
-            Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 9375 Data size: 180600 Basic stats: COMPLETE Column stats: NONE
             Limit
               Number of rows: 10
               Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/join47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join47.q.out b/ql/src/test/results/clientpositive/join47.q.out
index c04b94b..6f529d6 100644
--- a/ql/src/test/results/clientpositive/join47.q.out
+++ b/ql/src/test/results/clientpositive/join47.q.out
@@ -665,7 +665,7 @@ STAGE PLANS:
             1 
           outputColumnNames: _col0, _col1, _col2, _col3
           residual filter predicates: {(struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102))}
-          Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 9375 Data size: 180600 Basic stats: COMPLETE Column stats: NONE
           Limit
             Number of rows: 10
             Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE

[8/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
index 98ad365..ec1e540 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
@@ -379,9 +379,6 @@ PREHOOK: query: explain extended SELECT key, value FROM fact_daily_n2 WHERE ds='
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484')  or (key='238' and value= 'val_238'))
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`
-FROM `default`.`fact_daily_n2`
-WHERE `ds` = '1' AND `hr` = '4' AND (`key` = '484' AND `value` = 'val_484' OR `key` = '238' AND `value` = 'val_238')
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -392,12 +389,12 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: fact_daily_n2
-            filterExpr: ((ds = '1') and (hr = '4') and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean)
+            filterExpr: ((struct(key,value)) IN (const struct('484','val_484'), const struct('238','val_238')) and (ds = '1') and (hr = '4')) (type: boolean)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (((key = '238') and (value = 'val_238')) or ((key = '484') and (value = 'val_484'))) (type: boolean)
+              predicate: (struct(key,value)) IN (const struct('484','val_484'), const struct('238','val_238')) (type: boolean)
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -431,6 +428,55 @@ STAGE PLANS:
       Path -> Partition:
 #### A masked pattern was here ####
           Partition
+            base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 1
+              hr 4
+            properties:
+              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+              bucket_count -1
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.fact_daily_n2
+              numFiles 3
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct fact_daily_n2 { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                bucketing_version 2
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.fact_daily_n2
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct fact_daily_n2 { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.fact_daily_n2
+            name: default.fact_daily_n2
+#### A masked pattern was here ####
+          Partition
             base file name: value=val_238
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -528,6 +574,7 @@ STAGE PLANS:
               name: default.fact_daily_n2
             name: default.fact_daily_n2
       Truncated Path -> Alias:
+        /fact_daily_n2/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n2]
         /fact_daily_n2/ds=1/hr=4/key=238/value=val_238 [fact_daily_n2]
         /fact_daily_n2/ds=1/hr=4/key=484/value=val_484 [fact_daily_n2]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
index cc637db..260ba1c 100644
--- a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
@@ -788,9 +788,6 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended
 select * from srcbucket_pruned where (key=1 or key=2) and ds='2008-04-08'
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
-FROM `default`.`srcbucket_pruned`
-WHERE (`key` = 1 OR `key` = 2) AND `ds` = '2008-04-08'
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -804,12 +801,13 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_pruned
-                  filterExpr: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean)
+                  filterExpr: ((key) IN (1, 2) and (ds = '2008-04-08')) (type: boolean)
+                  buckets included: [4,13,] of 16
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean)
+                    predicate: ((ds = '2008-04-08') and (key) IN (1, 2)) (type: boolean)
                     Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key (type: int), value (type: string), '2008-04-08' (type: string)
@@ -852,9 +850,6 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended
 select * from srcbucket_pruned where (key=1 or key=2) and value = 'One' and ds='2008-04-08'
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, CAST('One' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds`
-FROM `default`.`srcbucket_pruned`
-WHERE (`key` = 1 OR `key` = 2) AND `value` = 'One' AND `ds` = '2008-04-08'
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -868,12 +863,13 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_pruned
-                  filterExpr: (((key = 1) or (key = 2)) and (value = 'One') and (ds = '2008-04-08')) (type: boolean)
+                  filterExpr: ((key) IN (1, 2) and (value = 'One') and (ds = '2008-04-08')) (type: boolean)
+                  buckets included: [4,13,] of 16
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((key = 1) or (key = 2)) and (ds = '2008-04-08') and (value = 'One')) (type: boolean)
+                    predicate: ((ds = '2008-04-08') and (key) IN (1, 2) and (value = 'One')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string)
@@ -1293,9 +1289,6 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended
 select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and (value='One' or value = 'Two')
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
-FROM `default`.`srcbucket_pruned`
-WHERE `key` = 1 AND `ds` = '2008-04-08' AND (`value` = 'One' OR `value` = 'Two')
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1309,12 +1302,13 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_pruned
-                  filterExpr: ((key = 1) and (ds = '2008-04-08') and ((value = 'One') or (value = 'Two'))) (type: boolean)
+                  filterExpr: ((value) IN ('One', 'Two') and (key = 1) and (ds = '2008-04-08')) (type: boolean)
+                  buckets included: [13,] of 16
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((value = 'One') or (value = 'Two')) and (ds = '2008-04-08') and (key = 1)) (type: boolean)
+                    predicate: ((ds = '2008-04-08') and (key = 1) and (value) IN ('One', 'Two')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: 1 (type: int), value (type: string), '2008-04-08' (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
index 1330a86..82a7717 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
@@ -85,27 +85,27 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: b
-                  filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                  filterExpr: (key) IN (0, 5) (type: boolean)
                   Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key) IN (0, 5) (type: boolean)
+                    Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 537 Basic stats: COMPLETE Column stats: COMPLETE
             Map Operator Tree:
                 TableScan
                   alias: a
-                  filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                  filterExpr: (key) IN (0, 5) (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
-                    Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key) IN (0, 5) (type: boolean)
+                    Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE
                       Merge Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -113,16 +113,16 @@ STAGE PLANS:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col4
-                        Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 3 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
         Reducer 2 
@@ -131,10 +131,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -143,7 +143,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
                   outputColumnNames: key, value, ds
-                  Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
                     keys: ds (type: string)
@@ -299,27 +299,27 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table2_n19
-                  filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                  filterExpr: (key) IN (0, 5) (type: boolean)
                   Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key) IN (0, 5) (type: boolean)
+                    Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
             Map Operator Tree:
                 TableScan
                   alias: test_table1_n20
-                  filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
+                  filterExpr: (key) IN (0, 5) (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
-                    Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: (key) IN (0, 5) (type: boolean)
+                    Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
                       Merge Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -327,16 +327,16 @@ STAGE PLANS:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col3
-                        Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 3 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
         Reducer 2 
@@ -345,10 +345,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -357,7 +357,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
                   outputColumnNames: key, value, ds
-                  Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
                     keys: ds (type: string)
@@ -519,27 +519,27 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table2_n19
-                  filterExpr: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean)
+                  filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean)
                   Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE
             Map Operator Tree:
                 TableScan
                   alias: test_table1_n20
-                  filterExpr: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean)
+                  filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
                       Merge Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -547,16 +547,16 @@ STAGE PLANS:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col3
-                        Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 183 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
         Reducer 2 
@@ -565,10 +565,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -577,7 +577,7 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
                   outputColumnNames: key, value, ds
-                  Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
                     keys: ds (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
index a35edb4..e61300b 100644
--- a/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
@@ -851,9 +851,9 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+          filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean)
           Filter Operator
-            predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+            predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean)
             Select Operator
               expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -875,9 +875,9 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+          filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean)
           Filter Operator
-            predicate: (c_int) IN (c_int, 0) (type: boolean)
+            predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean)
             Select Operator
               expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/check_constraint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/check_constraint.q.out b/ql/src/test/results/clientpositive/llap/check_constraint.q.out
index 123a3e4..ec1ed64 100644
--- a/ql/src/test/results/clientpositive/llap/check_constraint.q.out
+++ b/ql/src/test/results/clientpositive/llap/check_constraint.q.out
@@ -1931,10 +1931,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: acid_uami_n0
-                  filterExpr: (((de = 103) or (de = 119)) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean)
+                  filterExpr: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean)
                   Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((de = 103) or (de = 119)) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean)
+                    predicate: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean)
                     Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128))

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 8f06ee5..78eded3 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -5927,10 +5927,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date_hour_n0
-                  filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+                  filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
                   Statistics: Num rows: 4 Data size: 2944 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+                    predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean)
                     Statistics: Num rows: 2 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ds (type: string), hr (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
index e03cd34..1cfa613 100644
--- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
+++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
@@ -3233,10 +3233,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: acid_uami_n1
-                  filterExpr: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean)
+                  filterExpr: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean)
                   Statistics: Num rows: 1002 Data size: 225450 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean)
+                    predicate: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean)
                     Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128))

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 4db83c1..a981916 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1648,7 +1648,7 @@ Stage-0
     Select Operator [SEL_2]
       Output:["_col0"]
       Filter Operator [FIL_4]
-        predicate:((c_int = -6) or (c_int = 6))
+        predicate:(c_int) IN (-6, 6)
         TableScan [TS_0]
           Output:["key","c_int"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
index 47941fa..d821681 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
@@ -1015,7 +1015,7 @@ Stage-0
                       <-Map 2 [BROADCAST_EDGE] vectorized, llap
                         BROADCAST [RS_209]
                           PartitionCols:_col1, _col3
-                          Map Join Operator [MAPJOIN_208] (rows=275 width=10)
+                          Map Join Operator [MAPJOIN_208] (rows=550 width=10)
                             Conds:RS_205._col0=SEL_207._col0(Inner),Output:["_col1","_col2","_col3"]
                           <-Map 1 [BROADCAST_EDGE] vectorized, llap
                             BROADCAST [RS_205]
@@ -1026,9 +1026,9 @@ Stage-0
                                   predicate:(k1 is not null and v2 is not null and v3 is not null)
                                   TableScan [TS_0] (rows=170 width=34)
                                     default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"]
-                          <-Select Operator [SEL_207] (rows=250 width=10)
+                          <-Select Operator [SEL_207] (rows=500 width=10)
                               Output:["_col0"]
-                              Filter Operator [FIL_206] (rows=250 width=10)
+                              Filter Operator [FIL_206] (rows=500 width=10)
                                 predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null)
                                 TableScan [TS_3] (rows=500 width=10)
                                   default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
@@ -1039,7 +1039,7 @@ Stage-0
                           <-Map 10 [BROADCAST_EDGE] vectorized, llap
                             BROADCAST [RS_219]
                               PartitionCols:_col4, _col2
-                              Map Join Operator [MAPJOIN_218] (rows=275 width=10)
+                              Map Join Operator [MAPJOIN_218] (rows=550 width=10)
                                 Conds:RS_215._col0=SEL_217._col0(Inner),Output:["_col2","_col3","_col4","_col5"]
                               <-Map 9 [BROADCAST_EDGE] vectorized, llap
                                 BROADCAST [RS_215]
@@ -1050,9 +1050,9 @@ Stage-0
                                       predicate:((v1 = 'srv1') and k1 is not null and k2 is not null and k3 is not null and v2 is not null and v3 is not null)
                                       TableScan [TS_18] (rows=85 width=34)
                                         default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"]
-                              <-Select Operator [SEL_217] (rows=250 width=10)
+                              <-Select Operator [SEL_217] (rows=500 width=10)
                                   Output:["_col0"]
-                                  Filter Operator [FIL_216] (rows=250 width=10)
+                                  Filter Operator [FIL_216] (rows=500 width=10)
                                     predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null)
                                     TableScan [TS_21] (rows=500 width=10)
                                       default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/kryo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/kryo.q.out b/ql/src/test/results/clientpositive/llap/kryo.q.out
index 234bae8..764a914 100644
--- a/ql/src/test/results/clientpositive/llap/kryo.q.out
+++ b/ql/src/test/results/clientpositive/llap/kryo.q.out
@@ -44,10 +44,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: datetime_
-                  filterExpr: ((id = 1) or (id = 2) or (id = 3) or (id = 4) or (id = 5) or (id = 6)) (type: boolean)
+                  filterExpr: (id) IN (1, 2, 3, 4, 5, 6) (type: boolean)
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((id = 1) or (id = 2) or (id = 3) or (id = 4) or (id = 5) or (id = 6)) (type: boolean)
+                    predicate: (id) IN (1, 2, 3, 4, 5, 6) (type: boolean)
                     Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(datetime) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(datetime))) THEN (date_) ELSE (to_date(datetime)) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out b/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
index 88ddd9c..5b1584e 100644
--- a/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
@@ -126,10 +126,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_llap_n0
-                  filterExpr: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean)
+                  filterExpr: (cdecimal1) IN (3.35, 4.46) (type: boolean)
                   Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean)
+                    predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
                     Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
                     Top N Key Operator
                       sort order: ++
@@ -227,10 +227,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_llap_n0
-                  filterExpr: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean)
+                  filterExpr: (cdecimal1) IN (3.35, 4.46) (type: boolean)
                   Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean)
+                    predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
                     Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
                     Top N Key Operator
                       sort order: ++

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out
index 2ccb6eb..63b6e54 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out
@@ -1504,10 +1504,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv_n0
-                  filterExpr: ((d_year >= 1992) and (d_year <= 1997) and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean)
+                  filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_year >= 1992) and (d_year <= 1997)) (type: boolean)
                   Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (d_year <= 1997) and (d_year >= 1992)) (type: boolean)
+                    predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_year <= 1997) and (d_year >= 1992) and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(lo_revenue)
@@ -1613,10 +1613,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv_n0
-                  filterExpr: ((d_yearmonth = 'Dec1997') and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean)
+                  filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (d_yearmonth = 'Dec1997')) (type: boolean)
+                    predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_city (type: string), d_year (type: int), s_city (type: string), lo_revenue (type: double)
@@ -1730,10 +1730,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv_n0
-                  filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean)
+                  filterExpr: ((p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 748 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
+                    predicate: ((c_region = 'AMERICA') and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 748 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_nation (type: string), d_year (type: int), net_revenue (type: double)
@@ -1845,10 +1845,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv_n0
-                  filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((d_year = 1997) or (d_year = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean)
+                  filterExpr: ((d_year) IN (1997, 1998) and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 1997) or (d_year = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
+                    predicate: ((c_region = 'AMERICA') and (d_year) IN (1997, 1998) and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_year (type: int), p_category (type: string), s_nation (type: string), net_revenue (type: double)
@@ -1960,10 +1960,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv_n0
-                  filterExpr: ((c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES') and ((d_year = 1997) or (d_year = 1998))) (type: boolean)
+                  filterExpr: ((d_year) IN (1997, 1998) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 1997) or (d_year = 1998)) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean)
+                    predicate: ((c_region = 'AMERICA') and (d_year) IN (1997, 1998) and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_year (type: int), p_brand1 (type: string), s_city (type: string), net_revenue (type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out
index da74d76..8d14b3e 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out
@@ -1506,10 +1506,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv
-                  filterExpr: ((UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997) and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean)
+                  filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997)) (type: boolean)
                   Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992)) (type: boolean)
+                    predicate: ((UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992) and (c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double)
@@ -1619,10 +1619,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv
-                  filterExpr: ((d_yearmonth = 'Dec1997') and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean)
+                  filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (d_yearmonth = 'Dec1997')) (type: boolean)
+                    predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double)
@@ -1736,10 +1736,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv
-                  filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean)
+                  filterExpr: ((p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
+                    predicate: ((c_region = 'AMERICA') and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: UDFToInteger(d_year) (type: int), c_nation (type: string), net_revenue (type: double)
@@ -1851,10 +1851,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv
-                  filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean)
+                  filterExpr: ((UDFToInteger(d_year)) IN (1997, 1998) and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean)
+                    predicate: ((UDFToInteger(d_year)) IN (1997, 1998) and (c_region = 'AMERICA') and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: UDFToInteger(d_year) (type: int), s_nation (type: string), p_category (type: string), net_revenue (type: double)
@@ -1966,10 +1966,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.ssb_mv
-                  filterExpr: ((c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES') and ((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998))) (type: boolean)
+                  filterExpr: ((UDFToInteger(d_year)) IN (1997, 1998) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean)
                   Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean)
+                    predicate: ((UDFToInteger(d_year)) IN (1997, 1998) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: UDFToInteger(d_year) (type: int), s_city (type: string), p_brand1 (type: string), net_revenue (type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
index 65eec52..54ccf58 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -747,12 +747,12 @@ Stage-1 HIVE COUNTERS:
    RECORDS_OUT_0: 1
    RECORDS_OUT_INTERMEDIATE_Map_1: 50
    RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_7: 50
-   RECORDS_OUT_OPERATOR_FS_9: 1
-   RECORDS_OUT_OPERATOR_GBY_8: 1
+   RECORDS_OUT_OPERATOR_FIL_8: 50
+   RECORDS_OUT_OPERATOR_FS_12: 1
+   RECORDS_OUT_OPERATOR_GBY_11: 1
    RECORDS_OUT_OPERATOR_MAP_0: 0
-   RECORDS_OUT_OPERATOR_RS_3: 50
-   RECORDS_OUT_OPERATOR_SEL_2: 50
+   RECORDS_OUT_OPERATOR_RS_10: 50
+   RECORDS_OUT_OPERATOR_SEL_9: 50
    RECORDS_OUT_OPERATOR_TS_0: 1100
 Stage-1 LLAP IO COUNTERS:
    CACHE_HIT_BYTES: 1079

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
index 801dda3..3bfd1aa 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
@@ -144,7 +144,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: decimal_date_test
-                  filterExpr: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean)
+                  filterExpr: ((cdate <> DATE'1969-10-26') and (cdate <> DATE'1969-07-14') and (cdate <> DATE'1970-01-21')) (type: boolean)
                   Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -152,15 +152,15 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 5:boolean)
-                    predicate: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean)
-                    Statistics: Num rows: 12284 Data size: 653589 Basic stats: COMPLETE Column stats: NONE
+                        predicateExpression: FilterExprAndExpr(children: FilterDateColNotEqualDateScalar(col 3:date, val -67), FilterDateColNotEqualDateScalar(col 3:date, val -171), FilterDateColNotEqualDateScalar(col 3:date, val 20))
+                    predicate: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean)
+                    Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       Select Vectorization:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: []
-                      Statistics: Num rows: 12284 Data size: 653589 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count()
                         Group By Vectorization:
@@ -350,7 +350,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: decimal_date_test
-                  filterExpr: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean)
+                  filterExpr: ((cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> -3367.6517567568)) (type: boolean)
                   Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -358,15 +358,15 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean)
-                    predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean)
-                    Statistics: Num rows: 12274 Data size: 1306115 Basic stats: COMPLETE Column stats: NONE
+                        predicateExpression: FilterExprAndExpr(children: FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568))
+                    predicate: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135)) (type: boolean)
+                    Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       Select Vectorization:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: []
-                      Statistics: Num rows: 12274 Data size: 1306115 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count()
                         Group By Vectorization:
@@ -1108,13 +1108,13 @@ STAGE PLANS:
                   TableScan Vectorization:
                       native: true
                   Select Operator
-                    expressions: (cdate) IN (DATE'1969-10-26', DATE'1969-07-14') (type: boolean)
+                    expressions: ((cdate = DATE'1969-10-26') or (cdate = DATE'1969-07-14')) (type: boolean)
                     outputColumnNames: _col0
                     Select Vectorization:
                         className: VectorSelectOperator
                         native: true
-                        projectedOutputColumnNums: [5]
-                        selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 5:boolean
+                        projectedOutputColumnNums: [7]
+                        selectExpressions: ColOrCol(col 5:boolean, col 6:boolean)(children: DateColEqualDateScalar(col 3:date, date 1969-10-26) -> 5:boolean, DateColEqualDateScalar(col 3:date, date 1969-07-14) -> 6:boolean) -> 7:boolean
                     Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
@@ -1122,7 +1122,7 @@ STAGE PLANS:
                           aggregators: VectorUDAFCountStar(*) -> bigint
                           className: VectorGroupByOperator
                           groupByMode: HASH
-                          keyExpressions: col 5:boolean
+                          keyExpressions: col 7:boolean
                           native: false
                           vectorProcessingMode: HASH
                           projectedOutputColumnNums: [0]
@@ -1246,13 +1246,13 @@ STAGE PLANS:
                   TableScan Vectorization:
                       native: true
                   Select Operator
-                    expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean)
+                    expressions: ((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)) (type: boolean)
                     outputColumnNames: _col0
                     Select Vectorization:
                         className: VectorSelectOperator
                         native: true
-                        projectedOutputColumnNums: [5]
-                        selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean
+                        projectedOutputColumnNums: [8]
+                        selectExpressions: VectorUDFAdaptor(((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)))(children: DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946) -> 5:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135) -> 6:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568) -> 7:boolean) -> 8:boolean
                     Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
@@ -1260,7 +1260,7 @@ STAGE PLANS:
                           aggregators: VectorUDAFCountStar(*) -> bigint
                           className: VectorGroupByOperator
                           groupByMode: HASH
-                          keyExpressions: col 5:boolean
+                          keyExpressions: col 8:boolean
                           native: false
                           vectorProcessingMode: HASH
                           projectedOutputColumnNums: [0]
@@ -1287,7 +1287,7 @@ STAGE PLANS:
                 featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 allNative: false
-                usesVectorUDFAdaptor: false
+                usesVectorUDFAdaptor: true
                 vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out b/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
index 54d9914..9697f55 100644
--- a/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_decimal
-                  filterExpr: (UDFToDouble(id)) IN (1.0E8D, 2.0E8D) (type: boolean)
+                  filterExpr: (id) IN (100000000, 200000000) (type: boolean)
                   Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
@@ -71,8 +71,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterDoubleColumnInList(col 3:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 2:decimal(18,0))(children: ConvertDecimal64ToDecimal(col 0:decimal(18,0)/DECIMAL_64) -> 2:decimal(18,0)) -> 3:double)
-                    predicate: (UDFToDouble(id)) IN (1.0E8D, 2.0E8D) (type: boolean)
+                        predicateExpression: FilterDecimal64ColumnInList(col 0:decimal(18,0)/DECIMAL_64, values [, decimal64Val 100000000, decimalVal 100000000, decimal64Val 200000000, decimalVal 200000000])
+                    predicate: (id) IN (100000000, 200000000) (type: boolean)
                     Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: id (type: decimal(18,0))

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out b/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out
index 3756a2f..2579bb4 100644
--- a/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out
@@ -851,7 +851,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterStructColumnInList(structExpressions [col 0:bigint, col 1:string, col 2:double], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2])
                     predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1L,'a',1.5D), const struct(1L,'b',-0.5D), const struct(3L,'b',1.5D), const struct(1L,'d',1.5D), const struct(1L,'c',1.5D), const struct(1L,'b',2.5D), const struct(1L,'b',0.5D), const struct(5L,'b',1.5D), const struct(1L,'a',0.5D), const struct(3L,'b',1.5D)) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double)
                       outputColumnNames: _col0, _col1, _col2
@@ -859,13 +859,13 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: [0, 1, 2]
-                      Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out
index 725ed34..157d184 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out
@@ -10238,7 +10238,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_n6
-                  filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean)
+                  filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                   Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -10247,8 +10247,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck))
-                    predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean)
+                        predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck)
+                    predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                     Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: s (type: string), dec (type: decimal(4,2))
@@ -10516,7 +10516,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_n6
-                  filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean)
+                  filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                   Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -10525,8 +10525,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck))
-                    predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean)
+                        predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck)
+                    predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                     Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: s (type: string)
@@ -10783,7 +10783,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_n6
-                  filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean)
+                  filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                   Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -10792,8 +10792,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck))
-                    predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean)
+                        predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck)
+                    predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                     Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: s (type: string), bo (type: boolean)
@@ -11056,7 +11056,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_n6
-                  filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean)
+                  filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                   Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -11065,8 +11065,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck))
-                    predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean)
+                        predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck)
+                    predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                     Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: i (type: int)
@@ -11336,7 +11336,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_n6
-                  filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean)
+                  filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                   Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -11345,8 +11345,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck))
-                    predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean)
+                        predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck)
+                    predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean)
                     Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: s (type: string), dec (type: decimal(4,2))

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out
index 74ac56d..1871216 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out
@@ -1410,7 +1410,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_n7
-                  filterExpr: (((s = 'oscar allen') or (s = 'oscar carson')) and (t = 10Y)) (type: boolean)
+                  filterExpr: ((s) IN ('oscar allen', 'oscar carson') and (t = 10Y)) (type: boolean)
                   Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
@@ -1419,8 +1419,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val oscar allen), FilterStringGroupColEqualStringScalar(col 7:string, val oscar carson)), FilterLongColEqualLongScalar(col 0:tinyint, val 10))
-                    predicate: (((s = 'oscar allen') or (s = 'oscar carson')) and (t = 10Y)) (type: boolean)
+                        predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 7, values oscar allen, oscar carson), FilterLongColEqualLongScalar(col 0:tinyint, val 10))
+                    predicate: ((s) IN ('oscar allen', 'oscar carson') and (t = 10Y)) (type: boolean)
                     Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: UDFToByte(10) (type: tinyint), s (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
index d444ae8..2591c28 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean)
+                  filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
                   Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
@@ -60,9 +60,9 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583))
-                    predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean)
-                    Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                        predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583])
+                    predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
+                    Statistics: Num rows: 7 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string)
                       outputColumnNames: _col0, _col1, _col2
@@ -71,13 +71,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumnNums: [1, 17, 21]
                           selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string
-                      Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -201,7 +201,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean)
+                  filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
                   Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
@@ -210,9 +210,9 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583))
-                    predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean)
-                    Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                        predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583])
+                    predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean)
+                    Statistics: Num rows: 7 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN (null) ELSE ('c') END (type: string)
                       outputColumnNames: _col0, _col1, _col2
@@ -221,13 +221,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumnNums: [1, 18, 24]
                           selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string
-                      Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[6/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query46.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query46.q.out b/ql/src/test/results/clientpositive/perf/spark/query46.q.out
index ccce45c..2b925a3 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query46.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query46.q.out
@@ -84,11 +84,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col4 (type: int)
@@ -156,16 +156,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 13 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query48.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out
index 60a4767..38ccff2 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out
@@ -138,7 +138,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 9 
             Map Operator Tree:
                 TableScan
                   alias: store
@@ -153,8 +153,8 @@ STAGE PLANS:
                       Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
-                          1 _col3 (type: int)
+                          0 _col3 (type: int)
+                          1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -162,45 +162,33 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49)
-        Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 55), Reducer 3 (PARTITION-LEVEL SORT, 55)
-        Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 4 (PARTITION-LEVEL SORT, 138)
-        Reducer 6 <- Reducer 5 (GROUP, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133)
+        Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 147), Reducer 2 (PARTITION-LEVEL SORT, 147)
+        Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319)
+        Reducer 5 <- Reducer 4 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 2 
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: store_sales
-                  filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+                  filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+                    Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
-                      Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col3 (type: int)
-                        outputColumnNames: _col1, _col2, _col3, _col5, _col7
-                        input vertices:
-                          0 Map 1
-                        Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col1 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col1 (type: int)
-                          Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2))
+                      Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2))
             Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
-        Map 7 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -219,7 +207,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: customer_demographics
@@ -238,77 +226,85 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 9 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
-                  filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+                  filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
                   Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
-                    Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+                    Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ca_address_sk (type: int), ca_state (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
             Execution mode: vectorized
-        Reducer 3 
+        Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: int)
+                  0 _col0 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col2, _col3, _col5, _col7
-                Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col1, _col2, _col3, _col4, _col6
+                Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: int)
+                  key expressions: _col1 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col2 (type: int)
-                  Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2))
-        Reducer 4 
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2))
+        Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: int)
+                  0 _col1 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col3, _col5, _col7
-                Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col2, _col3, _col4, _col6
+                Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: int)
+                  key expressions: _col2 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col3 (type: int)
-                  Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col5 (type: int), _col7 (type: decimal(7,2))
-        Reducer 5 
+                  Map-reduce partition columns: _col2 (type: int)
+                  Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2))
+        Reducer 4 
+            Local Work:
+              Map Reduce Local Work
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col3 (type: int)
+                  0 _col2 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col5, _col7, _col14
-                Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col3, _col4, _col6, _col13
+                Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: (((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) (type: boolean)
-                  Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col5 (type: int)
-                    outputColumnNames: _col5
-                    Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000)) (type: boolean)
+                  Statistics: Num rows: 85183359 Data size: 7514900682 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col3 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col4
+                    input vertices:
+                      1 Map 9
+                    Statistics: Num rows: 93701696 Data size: 8266390929 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: sum(_col5)
+                      aggregations: sum(_col4)
                       mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -316,7 +312,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
-        Reducer 6 
+        Reducer 5 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query53.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query53.q.out b/ql/src/test/results/clientpositive/perf/spark/query53.q.out
index 2b1cdfe..ec9350e 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query53.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query53.q.out
@@ -86,7 +86,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403)
         Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
         Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529)
         Reducer 5 <- Reducer 4 (SORT, 1)
@@ -116,20 +116,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: item
-                  filterExpr: (((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean)
+                  filterExpr: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean)
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean)
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_manufact_id (type: int)
                       outputColumnNames: _col0, _col4
-                      Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col4 (type: int)
             Execution mode: vectorized
         Map 7 
@@ -140,16 +140,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_qoy (type: int)
                       outputColumnNames: _col0, _col2
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col2 (type: int)
             Execution mode: vectorized
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query56.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query56.q.out b/ql/src/test/results/clientpositive/perf/spark/query56.q.out
index 4705987..e03574f 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query56.q.out
@@ -142,17 +142,17 @@ STAGE PLANS:
       Edges:
         Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
         Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596)
-        Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 7), Reducer 19 (PARTITION-LEVEL SORT, 7)
+        Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 8), Reducer 19 (PARTITION-LEVEL SORT, 8)
         Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375)
         Reducer 17 <- Reducer 16 (GROUP, 406)
-        Reducer 19 <- Map 18 (GROUP, 3)
+        Reducer 19 <- Map 18 (GROUP, 6)
         Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305)
         Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494)
-        Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 7), Reducer 30 (PARTITION-LEVEL SORT, 7)
+        Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 8), Reducer 30 (PARTITION-LEVEL SORT, 8)
         Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191)
         Reducer 28 <- Reducer 27 (GROUP, 204)
         Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487)
-        Reducer 30 <- Map 18 (GROUP, 3)
+        Reducer 30 <- Map 18 (GROUP, 6)
         Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154)
         Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327)
         Reducer 4 <- Reducer 3 (GROUP, 529)
@@ -226,21 +226,21 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_id (type: string)
                       outputColumnNames: i_item_id
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         keys: i_item_id (type: string)
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: string)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 20 
             Map Operator Tree:
@@ -462,12 +462,12 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
         Reducer 21 
             Reduce Operator Tree:
               Join Operator
@@ -592,12 +592,12 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
         Reducer 32 
             Reduce Operator Tree:
               Join Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query63.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out
index b506455..0b1614f 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query63.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403)
         Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
         Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529)
         Reducer 5 <- Reducer 4 (SORT, 1)
@@ -118,20 +118,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: item
-                  filterExpr: (((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean)
+                  filterExpr: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean)
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean)
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_manager_id (type: int)
                       outputColumnNames: _col0, _col4
-                      Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col4 (type: int)
             Execution mode: vectorized
         Map 7 
@@ -142,16 +142,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_moy (type: int)
                       outputColumnNames: _col0, _col2
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col2 (type: int)
             Execution mode: vectorized
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query68.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query68.q.out b/ql/src/test/results/clientpositive/perf/spark/query68.q.out
index faf5d99..f2e6763 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query68.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query68.q.out
@@ -90,7 +90,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 10 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: store
@@ -98,11 +98,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col4 (type: int)
@@ -110,7 +110,7 @@ STAGE PLANS:
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
-        Map 11 
+        Map 12 
             Map Operator Tree:
                 TableScan
                   alias: household_demographics
@@ -130,35 +130,16 @@ STAGE PLANS:
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
-        Map 9 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2 and d_date_sk is not null) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean)
-                    Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: d_date_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
 
   Stage: Stage-1
     Spark
       Edges:
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855)
-        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 8 (PARTITION-LEVEL SORT, 882)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 9 (PARTITION-LEVEL SORT, 882)
         Reducer 4 <- Reducer 3 (SORT, 1)
-        Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Map 6 (PARTITION-LEVEL SORT, 846)
-        Reducer 8 <- Reducer 7 (GROUP, 582)
+        Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+        Reducer 8 <- Map 13 (PARTITION-LEVEL SORT, 846), Reducer 7 (PARTITION-LEVEL SORT, 846)
+        Reducer 9 <- Reducer 8 (GROUP, 582)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -181,7 +162,26 @@ STAGE PLANS:
                         Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string)
             Execution mode: vectorized
-        Map 12 
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  filterExpr: ((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2 and d_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean)
+                    Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 13 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
@@ -234,45 +234,13 @@ STAGE PLANS:
                       expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)), ss_ext_tax (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
                       Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                        input vertices:
-                          1 Map 9
-                        Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                        Map Join Operator
-                          condition map:
-                               Inner Join 0 to 1
-                          keys:
-                            0 _col4 (type: int)
-                            1 _col0 (type: int)
-                          outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8
-                          input vertices:
-                            1 Map 10
-                          Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                          Map Join Operator
-                            condition map:
-                                 Inner Join 0 to 1
-                            keys:
-                              0 _col2 (type: int)
-                              1 _col0 (type: int)
-                            outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8
-                            input vertices:
-                              1 Map 11
-                            Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                            Reduce Output Operator
-                              key expressions: _col3 (type: int)
-                              sort order: +
-                              Map-reduce partition columns: _col3 (type: int)
-                              Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                              value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2))
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2))
             Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -330,6 +298,44 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 7 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col4 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8
+                  input vertices:
+                    1 Map 11
+                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col2 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8
+                    input vertices:
+                      1 Map 12
+                    Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col3 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col3 (type: int)
+                      Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2))
+        Reducer 8 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -351,7 +357,7 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int)
                     Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
-        Reducer 8 
+        Reducer 9 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query69.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
index 83b55df..e17832c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
@@ -178,7 +178,7 @@ STAGE PLANS:
       Edges:
         Reducer 13 <- Map 12 (GROUP, 169)
         Reducer 16 <- Map 15 (GROUP, 336)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL SORT, 697)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855)
         Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
         Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
         Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648)
@@ -326,16 +326,16 @@ STAGE PLANS:
                   Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) (type: boolean)
-                    Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ca_address_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 9 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query71.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query71.q.out b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
index bf9c06d..eded78c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query71.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
@@ -193,10 +193,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: time_dim
-                  filterExpr: (((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) (type: boolean)
+                  filterExpr: ((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) (type: boolean)
                   Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) (type: boolean)
+                    predicate: ((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) (type: boolean)
                     Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query73.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query73.q.out b/ql/src/test/results/clientpositive/perf/spark/query73.q.out
index 20ec874..ade1df0 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query73.q.out
@@ -62,34 +62,14 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year) IN (2000, 2001, 2002) and d_dom BETWEEN 1 AND 2 and d_date_sk is not null) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean)
-                    Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: d_date_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
-        Map 7 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: household_demographics
-                  filterExpr: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+                  filterExpr: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+                    predicate: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: hd_demo_sk (type: int)
@@ -101,7 +81,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
             Local Work:
               Map Reduce Local Work
-        Map 8 
+        Map 9 
             Map Operator Tree:
                 TableScan
                   alias: store
@@ -109,11 +89,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col3 (type: int)
@@ -125,9 +105,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 5 (PARTITION-LEVEL SORT, 567)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 6 (PARTITION-LEVEL SORT, 567)
         Reducer 3 <- Reducer 2 (SORT, 1)
-        Reducer 5 <- Map 4 (GROUP, 529)
+        Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
+        Reducer 6 <- Reducer 5 (GROUP, 529)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -163,51 +144,32 @@ STAGE PLANS:
                       expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2, _col3, _col4
-                        input vertices:
-                          1 Map 6
-                        Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                        Map Join Operator
-                          condition map:
-                               Inner Join 0 to 1
-                          keys:
-                            0 _col2 (type: int)
-                            1 _col0 (type: int)
-                          outputColumnNames: _col1, _col3, _col4
-                          input vertices:
-                            1 Map 7
-                          Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                          Map Join Operator
-                            condition map:
-                                 Inner Join 0 to 1
-                            keys:
-                              0 _col3 (type: int)
-                              1 _col0 (type: int)
-                            outputColumnNames: _col1, _col4
-                            input vertices:
-                              1 Map 8
-                            Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                            Group By Operator
-                              aggregations: count()
-                              keys: _col1 (type: int), _col4 (type: int)
-                              mode: hash
-                              outputColumnNames: _col0, _col1, _col2
-                              Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                              Reduce Output Operator
-                                key expressions: _col0 (type: int), _col1 (type: int)
-                                sort order: ++
-                                Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                                value expressions: _col2 (type: bigint)
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
+            Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  filterExpr: ((d_year) IN (2000, 2001, 2002) and d_dom BETWEEN 1 AND 2 and d_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean)
+                    Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -242,6 +204,50 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 5 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3, _col4
+                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col2 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col1, _col3, _col4
+                  input vertices:
+                    1 Map 8
+                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col3 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col1, _col4
+                    input vertices:
+                      1 Map 9
+                    Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col1 (type: int), _col4 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                        Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+        Reducer 6 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query74.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query74.q.out b/ql/src/test/results/clientpositive/perf/spark/query74.q.out
index 3678906..e73a19f 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query74.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query74.q.out
@@ -170,16 +170,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), 2001 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
             Execution mode: vectorized
         Map 14 
@@ -230,16 +230,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), 2001 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
             Execution mode: vectorized
         Map 20 
@@ -290,16 +290,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), 2002 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
             Execution mode: vectorized
         Map 26 
@@ -330,16 +330,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), 2002 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
             Execution mode: vectorized
         Map 8 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query79.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out
index 9355239..a83090f 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out
@@ -150,16 +150,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_dow = 1) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query82.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query82.q.out b/ql/src/test/results/clientpositive/perf/spark/query82.q.out
index bc627f1..daadc88 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query82.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query82.q.out
@@ -64,7 +64,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 5 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399)
         Reducer 3 <- Reducer 2 (GROUP, 874)
         Reducer 4 <- Reducer 3 (SORT, 1)
 #### A masked pattern was here ####
@@ -96,16 +96,16 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2))
             Execution mode: vectorized
         Map 6 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query83.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out b/ql/src/test/results/clientpositive/perf/spark/query83.q.out
index 6fad2ca..1199d29 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out
@@ -208,21 +208,21 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_week_seq (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         keys: _col0 (type: int)
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 14 
             Map Operator Tree:
@@ -312,21 +312,21 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_week_seq (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         keys: _col0 (type: int)
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 25 
             Map Operator Tree:
@@ -416,21 +416,21 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_week_seq (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         keys: _col0 (type: int)
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 7 
             Map Operator Tree:

[3/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query46.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/query46.q.out
index 708a852..66f51c3 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query46.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query46.q.out
@@ -165,9 +165,9 @@ Stage-0
                                       <-Map 16 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_166]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_165] (rows=852 width=1910)
+                                          Select Operator [SEL_165] (rows=1704 width=1910)
                                             Output:["_col0"]
-                                            Filter Operator [FIL_164] (rows=852 width=1910)
+                                            Filter Operator [FIL_164] (rows=1704 width=1910)
                                               predicate:((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null)
                                               TableScan [TS_12] (rows=1704 width=1910)
                                                 default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_city"]
@@ -179,9 +179,9 @@ Stage-0
                                           <-Map 14 [SIMPLE_EDGE] vectorized
                                             PARTITION_ONLY_SHUFFLE [RS_158]
                                               PartitionCols:_col0
-                                              Select Operator [SEL_157] (rows=18263 width=1119)
+                                              Select Operator [SEL_157] (rows=73049 width=1119)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_156] (rows=18263 width=1119)
+                                                Filter Operator [FIL_156] (rows=73049 width=1119)
                                                   predicate:((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null)
                                                   TableScan [TS_9] (rows=73049 width=1119)
                                                     default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"]
@@ -202,7 +202,7 @@ Stage-0
                                                         PARTITION_ONLY_SHUFFLE [RS_161]
                                                           Group By Operator [GBY_160] (rows=1 width=12)
                                                             Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                            Select Operator [SEL_159] (rows=18263 width=1119)
+                                                            Select Operator [SEL_159] (rows=73049 width=1119)
                                                               Output:["_col0"]
                                                                Please refer to the previous Select Operator [SEL_157]
                                                   <-Reducer 17 [BROADCAST_EDGE] vectorized
@@ -213,7 +213,7 @@ Stage-0
                                                         PARTITION_ONLY_SHUFFLE [RS_169]
                                                           Group By Operator [GBY_168] (rows=1 width=12)
                                                             Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                            Select Operator [SEL_167] (rows=852 width=1910)
+                                                            Select Operator [SEL_167] (rows=1704 width=1910)
                                                               Output:["_col0"]
                                                                Please refer to the previous Select Operator [SEL_165]
                                                   <-Reducer 19 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query48.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
index 1cf8d5c..7b0ce1c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
@@ -131,16 +131,16 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
+Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
 Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
 Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE)
 Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
 Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
-Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
@@ -153,115 +153,113 @@ Stage-0
         <-Reducer 5 [CUSTOM_SIMPLE_EDGE]
           PARTITION_ONLY_SHUFFLE [RS_30]
             Group By Operator [GBY_29] (rows=1 width=8)
-              Output:["_col0"],aggregations:["sum(_col5)"]
-              Select Operator [SEL_28] (rows=15616947 width=88)
-                Output:["_col5"]
-                Filter Operator [FIL_27] (rows=15616947 width=88)
-                  predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000))
-                  Merge Join Operator [MERGEJOIN_96] (rows=93701693 width=88)
-                    Conds:RS_24._col3=RS_123._col0(Inner),Output:["_col5","_col7","_col14"]
-                  <-Map 13 [SIMPLE_EDGE] vectorized
-                    SHUFFLE [RS_123]
-                      PartitionCols:_col0
-                      Select Operator [SEL_122] (rows=10000000 width=1014)
-                        Output:["_col0","_col1"]
-                        Filter Operator [FIL_121] (rows=10000000 width=1014)
-                          predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null)
-                          TableScan [TS_12] (rows=40000000 width=1014)
-                            default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
-                  <-Reducer 4 [SIMPLE_EDGE]
-                    SHUFFLE [RS_24]
-                      PartitionCols:_col3
-                      Merge Join Operator [MERGEJOIN_95] (rows=85183356 width=88)
-                        Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col5","_col7"]
-                      <-Map 11 [SIMPLE_EDGE] vectorized
-                        SHUFFLE [RS_115]
-                          PartitionCols:_col0
-                          Select Operator [SEL_114] (rows=465450 width=385)
-                            Output:["_col0"]
-                            Filter Operator [FIL_113] (rows=465450 width=385)
-                              predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null)
-                              TableScan [TS_9] (rows=1861800 width=385)
-                                default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
-                      <-Reducer 3 [SIMPLE_EDGE]
-                        SHUFFLE [RS_21]
-                          PartitionCols:_col2
-                          Merge Join Operator [MERGEJOIN_94] (rows=77439413 width=88)
-                            Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col5","_col7"]
-                          <-Map 9 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_107]
-                              PartitionCols:_col0
-                              Select Operator [SEL_106] (rows=36524 width=1119)
-                                Output:["_col0"]
-                                Filter Operator [FIL_105] (rows=36524 width=1119)
-                                  predicate:((d_year = 1998) and d_date_sk is not null)
-                                  TableScan [TS_6] (rows=73049 width=1119)
-                                    default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
-                          <-Reducer 2 [SIMPLE_EDGE]
-                            SHUFFLE [RS_18]
-                              PartitionCols:_col1
-                              Merge Join Operator [MERGEJOIN_93] (rows=70399465 width=88)
-                                Conds:RS_99._col0=RS_131._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col7"]
-                              <-Map 1 [SIMPLE_EDGE] vectorized
-                                PARTITION_ONLY_SHUFFLE [RS_99]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_98] (rows=1704 width=1910)
-                                    Output:["_col0"]
-                                    Filter Operator [FIL_97] (rows=1704 width=1910)
-                                      predicate:s_store_sk is not null
-                                      TableScan [TS_0] (rows=1704 width=1910)
-                                        default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"]
-                              <-Map 8 [SIMPLE_EDGE] vectorized
-                                SHUFFLE [RS_131]
-                                  PartitionCols:_col3
-                                  Select Operator [SEL_130] (rows=63999513 width=88)
-                                    Output:["_col0","_col1","_col2","_col3","_col4","_col6"]
-                                    Filter Operator [FIL_129] (rows=63999513 width=88)
-                                      predicate:((ss_addr_sk BETWEEN DynamicValue(RS_25_customer_address_ca_address_sk_min) AND DynamicValue(RS_25_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_25_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN Dynami
 cValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null)
-                                      TableScan [TS_3] (rows=575995635 width=88)
-                                        default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"]
-                                      <-Reducer 10 [BROADCAST_EDGE] vectorized
-                                        BROADCAST [RS_112]
-                                          Group By Operator [GBY_111] (rows=1 width=12)
-                                            Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                          <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
-                                            SHUFFLE [RS_110]
-                                              Group By Operator [GBY_109] (rows=1 width=12)
-                                                Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                Select Operator [SEL_108] (rows=36524 width=1119)
-                                                  Output:["_col0"]
-                                                   Please refer to the previous Select Operator [SEL_106]
-                                      <-Reducer 12 [BROADCAST_EDGE] vectorized
-                                        BROADCAST [RS_120]
-                                          Group By Operator [GBY_119] (rows=1 width=12)
-                                            Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                          <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
-                                            SHUFFLE [RS_118]
-                                              Group By Operator [GBY_117] (rows=1 width=12)
-                                                Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                Select Operator [SEL_116] (rows=465450 width=385)
-                                                  Output:["_col0"]
-                                                   Please refer to the previous Select Operator [SEL_114]
-                                      <-Reducer 14 [BROADCAST_EDGE] vectorized
-                                        BROADCAST [RS_128]
-                                          Group By Operator [GBY_127] (rows=1 width=12)
-                                            Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=10000000)"]
-                                          <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized
-                                            SHUFFLE [RS_126]
-                                              Group By Operator [GBY_125] (rows=1 width=12)
-                                                Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=10000000)"]
-                                                Select Operator [SEL_124] (rows=10000000 width=1014)
-                                                  Output:["_col0"]
-                                                   Please refer to the previous Select Operator [SEL_122]
-                                      <-Reducer 7 [BROADCAST_EDGE] vectorized
-                                        BROADCAST [RS_104]
-                                          Group By Operator [GBY_103] (rows=1 width=12)
-                                            Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                          <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
-                                            PARTITION_ONLY_SHUFFLE [RS_102]
-                                              Group By Operator [GBY_101] (rows=1 width=12)
-                                                Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                Select Operator [SEL_100] (rows=1704 width=1910)
-                                                  Output:["_col0"]
-                                                   Please refer to the previous Select Operator [SEL_98]
+              Output:["_col0"],aggregations:["sum(_col4)"]
+              Merge Join Operator [MERGEJOIN_96] (rows=93701696 width=88)
+                Conds:RS_25._col3=RS_123._col0(Inner),Output:["_col4"]
+              <-Map 13 [SIMPLE_EDGE] vectorized
+                SHUFFLE [RS_123]
+                  PartitionCols:_col0
+                  Select Operator [SEL_122] (rows=1704 width=1910)
+                    Output:["_col0"]
+                    Filter Operator [FIL_121] (rows=1704 width=1910)
+                      predicate:s_store_sk is not null
+                      TableScan [TS_12] (rows=1704 width=1910)
+                        default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"]
+              <-Reducer 4 [SIMPLE_EDGE]
+                SHUFFLE [RS_25]
+                  PartitionCols:_col3
+                  Filter Operator [FIL_24] (rows=85183359 width=88)
+                    predicate:((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000))
+                    Merge Join Operator [MERGEJOIN_95] (rows=255550079 width=88)
+                      Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col4","_col6","_col13"]
+                    <-Map 11 [SIMPLE_EDGE] vectorized
+                      SHUFFLE [RS_115]
+                        PartitionCols:_col0
+                        Select Operator [SEL_114] (rows=20000000 width=1014)
+                          Output:["_col0","_col1"]
+                          Filter Operator [FIL_113] (rows=20000000 width=1014)
+                            predicate:((ca_country = 'United States') and ca_address_sk is not null)
+                            TableScan [TS_9] (rows=40000000 width=1014)
+                              default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
+                    <-Reducer 3 [SIMPLE_EDGE]
+                      SHUFFLE [RS_21]
+                        PartitionCols:_col2
+                        Merge Join Operator [MERGEJOIN_94] (rows=232318249 width=88)
+                          Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col4","_col6"]
+                        <-Map 9 [SIMPLE_EDGE] vectorized
+                          SHUFFLE [RS_107]
+                            PartitionCols:_col0
+                            Select Operator [SEL_106] (rows=465450 width=385)
+                              Output:["_col0"]
+                              Filter Operator [FIL_105] (rows=465450 width=385)
+                                predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null)
+                                TableScan [TS_6] (rows=1861800 width=385)
+                                  default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
+                        <-Reducer 2 [SIMPLE_EDGE]
+                          SHUFFLE [RS_18]
+                            PartitionCols:_col1
+                            Merge Join Operator [MERGEJOIN_93] (rows=211198404 width=88)
+                              Conds:RS_131._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"]
+                            <-Map 7 [SIMPLE_EDGE] vectorized
+                              PARTITION_ONLY_SHUFFLE [RS_99]
+                                PartitionCols:_col0
+                                Select Operator [SEL_98] (rows=36524 width=1119)
+                                  Output:["_col0"]
+                                  Filter Operator [FIL_97] (rows=36524 width=1119)
+                                    predicate:((d_year = 1998) and d_date_sk is not null)
+                                    TableScan [TS_3] (rows=73049 width=1119)
+                                      default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+                            <-Map 1 [SIMPLE_EDGE] vectorized
+                              SHUFFLE [RS_131]
+                                PartitionCols:_col0
+                                Select Operator [SEL_130] (rows=191998545 width=88)
+                                  Output:["_col0","_col1","_col2","_col3","_col4","_col6"]
+                                  Filter Operator [FIL_129] (rows=191998545 width=88)
+                                    predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_19_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_19_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_19_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_26_store_s_store_sk_min) AND DynamicValue(RS_26_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, Dyna
 micValue(RS_26_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null)
+                                    TableScan [TS_0] (rows=575995635 width=88)
+                                      default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"]
+                                    <-Reducer 10 [BROADCAST_EDGE] vectorized
+                                      BROADCAST [RS_112]
+                                        Group By Operator [GBY_111] (rows=1 width=12)
+                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                        <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_110]
+                                            Group By Operator [GBY_109] (rows=1 width=12)
+                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                              Select Operator [SEL_108] (rows=465450 width=385)
+                                                Output:["_col0"]
+                                                 Please refer to the previous Select Operator [SEL_106]
+                                    <-Reducer 12 [BROADCAST_EDGE] vectorized
+                                      BROADCAST [RS_120]
+                                        Group By Operator [GBY_119] (rows=1 width=12)
+                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"]
+                                        <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_118]
+                                            Group By Operator [GBY_117] (rows=1 width=12)
+                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"]
+                                              Select Operator [SEL_116] (rows=20000000 width=1014)
+                                                Output:["_col0"]
+                                                 Please refer to the previous Select Operator [SEL_114]
+                                    <-Reducer 14 [BROADCAST_EDGE] vectorized
+                                      BROADCAST [RS_128]
+                                        Group By Operator [GBY_127] (rows=1 width=12)
+                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                        <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_126]
+                                            Group By Operator [GBY_125] (rows=1 width=12)
+                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                              Select Operator [SEL_124] (rows=1704 width=1910)
+                                                Output:["_col0"]
+                                                 Please refer to the previous Select Operator [SEL_122]
+                                    <-Reducer 8 [BROADCAST_EDGE] vectorized
+                                      BROADCAST [RS_104]
+                                        Group By Operator [GBY_103] (rows=1 width=12)
+                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                        <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized
+                                          PARTITION_ONLY_SHUFFLE [RS_102]
+                                            Group By Operator [GBY_101] (rows=1 width=12)
+                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                              Select Operator [SEL_100] (rows=36524 width=1119)
+                                                Output:["_col0"]
+                                                 Please refer to the previous Select Operator [SEL_98]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query53.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/query53.q.out
index 3567534..267e28c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query53.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query53.q.out
@@ -113,9 +113,9 @@ Stage-0
                                   <-Map 9 [SIMPLE_EDGE] vectorized
                                     SHUFFLE [RS_95]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_94] (rows=36525 width=1119)
+                                      Select Operator [SEL_94] (rows=73049 width=1119)
                                         Output:["_col0","_col2"]
-                                        Filter Operator [FIL_93] (rows=36525 width=1119)
+                                        Filter Operator [FIL_93] (rows=73049 width=1119)
                                           predicate:((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null)
                                           TableScan [TS_6] (rows=73049 width=1119)
                                             default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_qoy"]
@@ -127,10 +127,10 @@ Stage-0
                                       <-Map 7 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_87]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_86] (rows=115500 width=1436)
+                                          Select Operator [SEL_86] (rows=462000 width=1436)
                                             Output:["_col0","_col4"]
-                                            Filter Operator [FIL_85] (rows=115500 width=1436)
-                                              predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null)
+                                            Filter Operator [FIL_85] (rows=462000 width=1436)
+                                              predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null)
                                               TableScan [TS_3] (rows=462000 width=1436)
                                                 default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"]
                                       <-Map 1 [SIMPLE_EDGE] vectorized
@@ -150,7 +150,7 @@ Stage-0
                                                     SHUFFLE [RS_98]
                                                       Group By Operator [GBY_97] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_96] (rows=36525 width=1119)
+                                                        Select Operator [SEL_96] (rows=73049 width=1119)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_94]
                                               <-Reducer 12 [BROADCAST_EDGE] vectorized
@@ -172,7 +172,7 @@ Stage-0
                                                     PARTITION_ONLY_SHUFFLE [RS_90]
                                                       Group By Operator [GBY_89] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_88] (rows=115500 width=1436)
+                                                        Select Operator [SEL_88] (rows=462000 width=1436)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_86]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query56.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
index 0d8ac48..40f39b5 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
@@ -210,16 +210,16 @@ Stage-0
                                 <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized
                                   FORWARD [RS_325]
                                     PartitionCols:_col0
-                                    Group By Operator [GBY_324] (rows=115500 width=1436)
+                                    Group By Operator [GBY_324] (rows=231000 width=1436)
                                       Output:["_col0"],keys:KEY._col0
                                     <-Map 15 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_323]
                                         PartitionCols:_col0
-                                        Group By Operator [GBY_322] (rows=231000 width=1436)
+                                        Group By Operator [GBY_322] (rows=462000 width=1436)
                                           Output:["_col0"],keys:i_item_id
-                                          Select Operator [SEL_321] (rows=231000 width=1436)
+                                          Select Operator [SEL_321] (rows=462000 width=1436)
                                             Output:["i_item_id"]
-                                            Filter Operator [FIL_320] (rows=231000 width=1436)
+                                            Filter Operator [FIL_320] (rows=462000 width=1436)
                                               predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null)
                                               TableScan [TS_3] (rows=462000 width=1436)
                                                 default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query63.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/query63.q.out
index a5b7b5a..c9502f0 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query63.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query63.q.out
@@ -115,9 +115,9 @@ Stage-0
                                   <-Map 9 [SIMPLE_EDGE] vectorized
                                     SHUFFLE [RS_95]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_94] (rows=36525 width=1119)
+                                      Select Operator [SEL_94] (rows=73049 width=1119)
                                         Output:["_col0","_col2"]
-                                        Filter Operator [FIL_93] (rows=36525 width=1119)
+                                        Filter Operator [FIL_93] (rows=73049 width=1119)
                                           predicate:((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null)
                                           TableScan [TS_6] (rows=73049 width=1119)
                                             default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_moy"]
@@ -129,10 +129,10 @@ Stage-0
                                       <-Map 7 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_87]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_86] (rows=115500 width=1436)
+                                          Select Operator [SEL_86] (rows=462000 width=1436)
                                             Output:["_col0","_col4"]
-                                            Filter Operator [FIL_85] (rows=115500 width=1436)
-                                              predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null)
+                                            Filter Operator [FIL_85] (rows=462000 width=1436)
+                                              predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null)
                                               TableScan [TS_3] (rows=462000 width=1436)
                                                 default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"]
                                       <-Map 1 [SIMPLE_EDGE] vectorized
@@ -152,7 +152,7 @@ Stage-0
                                                     SHUFFLE [RS_98]
                                                       Group By Operator [GBY_97] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_96] (rows=36525 width=1119)
+                                                        Select Operator [SEL_96] (rows=73049 width=1119)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_94]
                                               <-Reducer 12 [BROADCAST_EDGE] vectorized
@@ -174,7 +174,7 @@ Stage-0
                                                     PARTITION_ONLY_SHUFFLE [RS_90]
                                                       Group By Operator [GBY_89] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_88] (rows=115500 width=1436)
+                                                        Select Operator [SEL_88] (rows=462000 width=1436)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_86]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query64.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
index 6d3edd3..e3eea58 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
@@ -519,11 +519,11 @@ Stage-0
                                                                                     PARTITION_ONLY_SHUFFLE [RS_1144]
                                                                                       Group By Operator [GBY_1142] (rows=1 width=12)
                                                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                                        Select Operator [SEL_1140] (rows=2851 width=1436)
+                                                                                        Select Operator [SEL_1140] (rows=5703 width=1436)
                                                                                           Output:["_col0"]
-                                                                                          Select Operator [SEL_1136] (rows=2851 width=1436)
+                                                                                          Select Operator [SEL_1136] (rows=5703 width=1436)
                                                                                             Output:["_col0","_col3"]
-                                                                                            Filter Operator [FIL_1135] (rows=2851 width=1436)
+                                                                                            Filter Operator [FIL_1135] (rows=5703 width=1436)
                                                                                               predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null)
                                                                                               TableScan [TS_34] (rows=462000 width=1436)
                                                                                                 default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"]
@@ -624,7 +624,7 @@ Stage-0
                                                                                       PARTITION_ONLY_SHUFFLE [RS_1143]
                                                                                         Group By Operator [GBY_1141] (rows=1 width=12)
                                                                                           Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                                          Select Operator [SEL_1138] (rows=2851 width=1436)
+                                                                                          Select Operator [SEL_1138] (rows=5703 width=1436)
                                                                                             Output:["_col0"]
                                                                                              Please refer to the previous Select Operator [SEL_1136]
                                                                                 <-Reducer 45 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query68.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/query68.q.out
index 24b2502..fd1e04b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query68.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query68.q.out
@@ -179,9 +179,9 @@ Stage-0
                                       <-Map 16 [SIMPLE_EDGE] vectorized
                                         PARTITION_ONLY_SHUFFLE [RS_166]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_165] (rows=852 width=1910)
+                                          Select Operator [SEL_165] (rows=1704 width=1910)
                                             Output:["_col0"]
-                                            Filter Operator [FIL_164] (rows=852 width=1910)
+                                            Filter Operator [FIL_164] (rows=1704 width=1910)
                                               predicate:((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null)
                                               TableScan [TS_12] (rows=1704 width=1910)
                                                 default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_city"]
@@ -193,9 +193,9 @@ Stage-0
                                           <-Map 14 [SIMPLE_EDGE] vectorized
                                             PARTITION_ONLY_SHUFFLE [RS_158]
                                               PartitionCols:_col0
-                                              Select Operator [SEL_157] (rows=4058 width=1119)
+                                              Select Operator [SEL_157] (rows=8116 width=1119)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_156] (rows=4058 width=1119)
+                                                Filter Operator [FIL_156] (rows=8116 width=1119)
                                                   predicate:((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2)
                                                   TableScan [TS_9] (rows=73049 width=1119)
                                                     default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
@@ -216,7 +216,7 @@ Stage-0
                                                         PARTITION_ONLY_SHUFFLE [RS_161]
                                                           Group By Operator [GBY_160] (rows=1 width=12)
                                                             Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                            Select Operator [SEL_159] (rows=4058 width=1119)
+                                                            Select Operator [SEL_159] (rows=8116 width=1119)
                                                               Output:["_col0"]
                                                                Please refer to the previous Select Operator [SEL_157]
                                                   <-Reducer 17 [BROADCAST_EDGE] vectorized
@@ -227,7 +227,7 @@ Stage-0
                                                         PARTITION_ONLY_SHUFFLE [RS_169]
                                                           Group By Operator [GBY_168] (rows=1 width=12)
                                                             Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                            Select Operator [SEL_167] (rows=852 width=1910)
+                                                            Select Operator [SEL_167] (rows=1704 width=1910)
                                                               Output:["_col0"]
                                                                Please refer to the previous Select Operator [SEL_165]
                                                   <-Reducer 19 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query69.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/query69.q.out
index 738508a..4aad667 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query69.q.out
@@ -224,9 +224,9 @@ Stage-0
                                             <-Map 9 [SIMPLE_EDGE] vectorized
                                               SHUFFLE [RS_187]
                                                 PartitionCols:_col0
-                                                Select Operator [SEL_186] (rows=20000000 width=1014)
+                                                Select Operator [SEL_186] (rows=40000000 width=1014)
                                                   Output:["_col0"]
-                                                  Filter Operator [FIL_185] (rows=20000000 width=1014)
+                                                  Filter Operator [FIL_185] (rows=40000000 width=1014)
                                                     predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
                                                     TableScan [TS_3] (rows=40000000 width=1014)
                                                       default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query71.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query71.q.out b/ql/src/test/results/clientpositive/perf/tez/query71.q.out
index 4521aab..ca62fa3 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query71.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query71.q.out
@@ -120,7 +120,7 @@ Stage-0
                         Select Operator [SEL_172] (rows=86400 width=471)
                           Output:["_col0","_col1","_col2"]
                           Filter Operator [FIL_171] (rows=86400 width=471)
-                            predicate:(((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null)
+                            predicate:((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null)
                             TableScan [TS_35] (rows=86400 width=471)
                               default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"]
                     <-Reducer 4 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query73.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/query73.q.out
index cfa5213..153b8b4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query73.q.out
@@ -109,9 +109,9 @@ Stage-0
                             <-Map 14 [SIMPLE_EDGE] vectorized
                               SHUFFLE [RS_119]
                                 PartitionCols:_col0
-                                Select Operator [SEL_118] (rows=852 width=1910)
+                                Select Operator [SEL_118] (rows=1704 width=1910)
                                   Output:["_col0"]
-                                  Filter Operator [FIL_117] (rows=852 width=1910)
+                                  Filter Operator [FIL_117] (rows=1704 width=1910)
                                     predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null)
                                     TableScan [TS_12] (rows=1704 width=1910)
                                       default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county"]
@@ -126,7 +126,7 @@ Stage-0
                                     Select Operator [SEL_11] (rows=1200 width=107)
                                       Output:["_col0"]
                                       Filter Operator [FIL_55] (rows=1200 width=107)
-                                        predicate:(((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null)
+                                        predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null)
                                         TableScan [TS_9] (rows=7200 width=107)
                                           default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"]
                                 <-Reducer 6 [SIMPLE_EDGE]
@@ -137,9 +137,9 @@ Stage-0
                                     <-Map 10 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_109]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_108] (rows=4058 width=1119)
+                                        Select Operator [SEL_108] (rows=8116 width=1119)
                                           Output:["_col0"]
-                                          Filter Operator [FIL_107] (rows=4058 width=1119)
+                                          Filter Operator [FIL_107] (rows=8116 width=1119)
                                             predicate:((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2)
                                             TableScan [TS_6] (rows=73049 width=1119)
                                               default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
@@ -160,7 +160,7 @@ Stage-0
                                                   SHUFFLE [RS_112]
                                                     Group By Operator [GBY_111] (rows=1 width=12)
                                                       Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                      Select Operator [SEL_110] (rows=4058 width=1119)
+                                                      Select Operator [SEL_110] (rows=8116 width=1119)
                                                         Output:["_col0"]
                                                          Please refer to the previous Select Operator [SEL_108]
                                             <-Reducer 13 [BROADCAST_EDGE] vectorized
@@ -182,7 +182,7 @@ Stage-0
                                                   SHUFFLE [RS_122]
                                                     Group By Operator [GBY_121] (rows=1 width=12)
                                                       Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                      Select Operator [SEL_120] (rows=852 width=1910)
+                                                      Select Operator [SEL_120] (rows=1704 width=1910)
                                                         Output:["_col0"]
                                                          Please refer to the previous Select Operator [SEL_118]
                                             <-Reducer 4 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query74.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/query74.q.out
index 854e6dc..82f6655 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query74.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query74.q.out
@@ -199,9 +199,9 @@ Stage-0
                                       <-Map 19 [SIMPLE_EDGE] vectorized
                                         SHUFFLE [RS_290]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_284] (rows=18262 width=1119)
+                                          Select Operator [SEL_284] (rows=36524 width=1119)
                                             Output:["_col0","_col1"]
-                                            Filter Operator [FIL_281] (rows=18262 width=1119)
+                                            Filter Operator [FIL_281] (rows=36524 width=1119)
                                               predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null)
                                               TableScan [TS_65] (rows=73049 width=1119)
                                                 default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
@@ -222,7 +222,7 @@ Stage-0
                                                     SHUFFLE [RS_300]
                                                       Group By Operator [GBY_296] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_291] (rows=18262 width=1119)
+                                                        Select Operator [SEL_291] (rows=36524 width=1119)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_284]
                                               <-Reducer 26 [BROADCAST_EDGE] vectorized
@@ -266,9 +266,9 @@ Stage-0
                                       <-Map 19 [SIMPLE_EDGE] vectorized
                                         SHUFFLE [RS_292]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_285] (rows=18262 width=1119)
+                                          Select Operator [SEL_285] (rows=36524 width=1119)
                                             Output:["_col0","_col1"]
-                                            Filter Operator [FIL_282] (rows=18262 width=1119)
+                                            Filter Operator [FIL_282] (rows=36524 width=1119)
                                               predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null)
                                                Please refer to the previous TableScan [TS_65]
                                       <-Map 11 [SIMPLE_EDGE] vectorized
@@ -288,7 +288,7 @@ Stage-0
                                                     SHUFFLE [RS_301]
                                                       Group By Operator [GBY_297] (rows=1 width=12)
                                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                        Select Operator [SEL_293] (rows=18262 width=1119)
+                                                        Select Operator [SEL_293] (rows=36524 width=1119)
                                                           Output:["_col0"]
                                                            Please refer to the previous Select Operator [SEL_285]
                   <-Reducer 18 [SIMPLE_EDGE] vectorized
@@ -317,9 +317,9 @@ Stage-0
                                   <-Map 19 [SIMPLE_EDGE] vectorized
                                     SHUFFLE [RS_286]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_283] (rows=18262 width=1119)
+                                      Select Operator [SEL_283] (rows=36524 width=1119)
                                         Output:["_col0","_col1"]
-                                        Filter Operator [FIL_280] (rows=18262 width=1119)
+                                        Filter Operator [FIL_280] (rows=36524 width=1119)
                                           predicate:((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null)
                                            Please refer to the previous TableScan [TS_65]
                                   <-Map 15 [SIMPLE_EDGE] vectorized
@@ -339,7 +339,7 @@ Stage-0
                                                 SHUFFLE [RS_298]
                                                   Group By Operator [GBY_294] (rows=1 width=12)
                                                     Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                    Select Operator [SEL_287] (rows=18262 width=1119)
+                                                    Select Operator [SEL_287] (rows=36524 width=1119)
                                                       Output:["_col0"]
                                                        Please refer to the previous Select Operator [SEL_283]
                                           <-Reducer 25 [BROADCAST_EDGE] vectorized
@@ -397,7 +397,7 @@ Stage-0
                                                 SHUFFLE [RS_299]
                                                   Group By Operator [GBY_295] (rows=1 width=12)
                                                     Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                    Select Operator [SEL_289] (rows=18262 width=1119)
+                                                    Select Operator [SEL_289] (rows=36524 width=1119)
                                                       Output:["_col0"]
                                                        Please refer to the previous Select Operator [SEL_283]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query79.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/query79.q.out
index 105a739..7b9d48f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query79.q.out
@@ -127,9 +127,9 @@ Stage-0
                                     <-Map 10 [SIMPLE_EDGE] vectorized
                                       SHUFFLE [RS_109]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_108] (rows=18262 width=1119)
+                                        Select Operator [SEL_108] (rows=36524 width=1119)
                                           Output:["_col0"]
-                                          Filter Operator [FIL_107] (rows=18262 width=1119)
+                                          Filter Operator [FIL_107] (rows=36524 width=1119)
                                             predicate:((d_dow = 1) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null)
                                             TableScan [TS_6] (rows=73049 width=1119)
                                               default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"]
@@ -150,7 +150,7 @@ Stage-0
                                                   SHUFFLE [RS_112]
                                                     Group By Operator [GBY_111] (rows=1 width=12)
                                                       Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                      Select Operator [SEL_110] (rows=18262 width=1119)
+                                                      Select Operator [SEL_110] (rows=36524 width=1119)
                                                         Output:["_col0"]
                                                          Please refer to the previous Select Operator [SEL_108]
                                             <-Reducer 13 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query82.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query82.q.out b/ql/src/test/results/clientpositive/perf/tez/query82.q.out
index bb5a9e9..de5a681 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query82.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query82.q.out
@@ -67,9 +67,9 @@ Stage-0
                       <-Map 5 [SIMPLE_EDGE] vectorized
                         PARTITION_ONLY_SHUFFLE [RS_76]
                           PartitionCols:_col0
-                          Select Operator [SEL_75] (rows=25666 width=1436)
+                          Select Operator [SEL_75] (rows=51333 width=1436)
                             Output:["_col0","_col1","_col2","_col3"]
-                            Filter Operator [FIL_74] (rows=25666 width=1436)
+                            Filter Operator [FIL_74] (rows=51333 width=1436)
                               predicate:((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null)
                               TableScan [TS_3] (rows=462000 width=1436)
                                 default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"]
@@ -113,7 +113,7 @@ Stage-0
                                     PARTITION_ONLY_SHUFFLE [RS_79]
                                       Group By Operator [GBY_78] (rows=1 width=12)
                                         Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                        Select Operator [SEL_77] (rows=25666 width=1436)
+                                        Select Operator [SEL_77] (rows=51333 width=1436)
                                           Output:["_col0"]
                                            Please refer to the previous Select Operator [SEL_75]
                               <-Reducer 9 [BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/tez/query83.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/query83.q.out
index f766e8d..c33b37c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query83.q.out
@@ -214,11 +214,11 @@ Stage-0
                                         <-Map 19 [SIMPLE_EDGE] vectorized
                                           SHUFFLE [RS_378]
                                             PartitionCols:_col0
-                                            Group By Operator [GBY_377] (rows=36525 width=1119)
+                                            Group By Operator [GBY_377] (rows=73049 width=1119)
                                               Output:["_col0"],keys:_col0
-                                              Select Operator [SEL_376] (rows=36525 width=1119)
+                                              Select Operator [SEL_376] (rows=73049 width=1119)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_375] (rows=36525 width=1119)
+                                                Filter Operator [FIL_375] (rows=73049 width=1119)
                                                   predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null)
                                                   TableScan [TS_12] (rows=73049 width=1119)
                                                     default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]

[7/9] hive git commit: HIVE-19097 : related equals and in operators may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index ba004e9..edbe8bd 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -6775,10 +6775,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date_hour_n2
-                  filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+                  filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
                   Statistics: Num rows: 4 Data size: 2944 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+                    predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean)
                     Statistics: Num rows: 2 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ds (type: string), hr (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index 54d5be0..aa4d888 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -275,7 +275,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterTimestampColumnInList(col 0:timestamp, values [0001-01-02 16:00:00.0, 0002-02-03 16:00:00.0])
                     predicate: (ts) IN (TIMESTAMP'0001-01-01 00:00:00', TIMESTAMP'0002-02-02 00:00:00') (type: boolean)
-                    Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ts (type: timestamp)
                       outputColumnNames: _col0
@@ -283,13 +283,13 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: [0]
-                      Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/mapjoin47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out
index cf29fa0..172d160 100644
--- a/ql/src/test/results/clientpositive/mapjoin47.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin47.q.out
@@ -707,7 +707,7 @@ STAGE PLANS:
                   1 
                 outputColumnNames: _col0, _col1, _col2, _col3
                 residual filter predicates: {(struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102))}
-                Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 9375 Data size: 180600 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 10
                   Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
index 01f8951..d352509 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
@@ -30627,17 +30627,17 @@ STAGE PLANS:
             Filter Operator
               isSamplingPred: false
               predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
-              Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30856,19 +30856,19 @@ STAGE PLANS:
             Filter Operator
               isSamplingPred: false
               predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
-              Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count()
                 keys: cstring1 (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   null sort order: a
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
                   tag: -1
                   value expressions: _col1 (type: bigint)
                   auto parallelism: false
@@ -30933,11 +30933,11 @@ STAGE PLANS:
           keys: KEY._col0 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col1 (type: bigint), _col0 (type: string)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -30966,7 +30966,7 @@ STAGE PLANS:
               key expressions: _col1 (type: string)
               null sort order: a
               sort order: +
-              Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
               tag: -1
               value expressions: _col0 (type: bigint)
               auto parallelism: false
@@ -31003,13 +31003,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
index 919b712..68a58bd 100644
--- a/ql/src/test/results/clientpositive/pcr.q.out
+++ b/ql/src/test/results/clientpositive/pcr.q.out
@@ -1460,11 +1460,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-0
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT CAST(14 AS INTEGER) AS `key`, `value`
-FROM (SELECT `value`
-FROM `default`.`pcr_t1`
-WHERE (`ds` = '2000-04-08' OR `ds` = '2000-04-09') AND `key` = 14
-ORDER BY `value`) AS `t1`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1475,7 +1470,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: pcr_t1
-            filterExpr: (((ds = '2000-04-08') or (ds = '2000-04-09')) and (key = 14)) (type: boolean)
+            filterExpr: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean)
             Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Filter Operator
@@ -1647,10 +1642,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`
-FROM `default`.`pcr_t1`
-WHERE `ds` = '2000-04-08' OR `ds` = '2000-04-09'
-ORDER BY `key`, `value`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1661,7 +1652,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: pcr_t1
-            filterExpr: ((ds = '2000-04-08') or (ds = '2000-04-09')) (type: boolean)
+            filterExpr: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean)
             Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Select Operator
@@ -2162,10 +2153,6 @@ PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, `ds`
-FROM `default`.`pcr_t1`
-WHERE `ds` = '2000-04-08' AND `key` = 1 OR `ds` = '2000-04-09' AND `key` = 2
-ORDER BY `key`, `value`, `ds`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -2176,22 +2163,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: pcr_t1
-            filterExpr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+            filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (struct(ds)) IN (struct('2000-04-08'), struct('2000-04-09'))) (type: boolean)
             Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
-              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                   null sort order: aaa
                   sort order: +++
-                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
                   tag: -1
                   auto parallelism: false
       Execution mode: vectorized
@@ -2302,13 +2289,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -4919,11 +4906,6 @@ PREHOOK: query: explain extended select key, value, ds, hr from srcpart where ds
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
-FROM (SELECT `key`, `value`, `hr`
-FROM `default`.`srcpart`
-WHERE `ds` = '2008-04-08' AND (`hr` = '11' OR `hr` = '12') AND `key` = 11
-ORDER BY `key`, `hr`) AS `t1`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -4934,7 +4916,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: srcpart
-            filterExpr: ((ds = '2008-04-08') and ((hr = '11') or (hr = '12')) and (UDFToDouble(key) = 11.0D)) (type: boolean)
+            filterExpr: ((hr) IN ('11', '12') and (ds = '2008-04-08') and (UDFToDouble(key) = 11.0D)) (type: boolean)
             Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Filter Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out
index c8819cc..5cfc093 100644
--- a/ql/src/test/results/clientpositive/pcs.q.out
+++ b/ql/src/test/results/clientpositive/pcs.q.out
@@ -764,17 +764,17 @@ STAGE PLANS:
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col2,_col0,_col8)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean)
-            Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: _col2 (type: string), _col6 (type: int)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
                 GlobalTableId: 0
 #### A masked pattern was here ####
                 NumFilesPerFileSink: 1
-                Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                 table:
                     input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
index 45dfc53..b7faa9a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
@@ -202,7 +202,7 @@ STAGE PLANS:
       Edges:
         Reducer 12 <- Map 11 (GROUP, 169)
         Reducer 15 <- Map 14 (GROUP, 336)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 7 (PARTITION-LEVEL SORT, 697)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855)
         Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
         Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
         Reducer 5 <- Reducer 4 (GROUP, 1009)
@@ -309,16 +309,16 @@ STAGE PLANS:
                   Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) (type: boolean)
-                    Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ca_address_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 8 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query12.q.out b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
index ad7e912..413930c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query12.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
@@ -96,7 +96,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 171), Map 7 (PARTITION-LEVEL SORT, 171)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 174), Map 7 (PARTITION-LEVEL SORT, 174)
         Reducer 3 <- Reducer 2 (GROUP, 186)
         Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 93)
         Reducer 5 <- Reducer 4 (SORT, 1)
@@ -142,16 +142,16 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
             Execution mode: vectorized
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
index fb2a061..c9fcb88 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
@@ -109,22 +109,22 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 8 
+        Map 10 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: store
+                  filterExpr: s_store_sk is not null (type: boolean)
+                  Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: s_store_sk is not null (type: boolean)
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int), hd_dep_count (type: int)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                      expressions: s_store_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col3 (type: int)
+                          0 _col4 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -134,23 +134,23 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 8 
             Map Operator Tree:
                 TableScan
-                  alias: store
-                  filterExpr: s_store_sk is not null (type: boolean)
-                  Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: s_store_sk is not null (type: boolean)
-                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: s_store_sk (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+                      expressions: hd_demo_sk (type: int), hd_dep_count (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
-                          1 _col4 (type: int)
+                          0 _col2 (type: int)
+                          1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -158,65 +158,33 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49)
-        Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 3 (PARTITION-LEVEL SORT, 138)
-        Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 17), Reducer 4 (PARTITION-LEVEL SORT, 17)
-        Reducer 6 <- Reducer 5 (GROUP, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133)
+        Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 152), Reducer 2 (PARTITION-LEVEL SORT, 152)
+        Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166)
+        Reducer 5 <- Reducer 4 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 10 
-            Map Operator Tree:
-                TableScan
-                  alias: customer_demographics
-                  filterExpr: (((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and ((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and cd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string), _col2 (type: string)
-            Execution mode: vectorized
-        Map 2 
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: store_sales
-                  filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+                  filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+                    Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                      Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col4 (type: int)
-                        outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10
-                        input vertices:
-                          0 Map 1
-                        Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col1 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col1 (type: int)
-                          Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+                      Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
             Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
-        Map 7 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -235,26 +203,62 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_demographics
+                  filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string), _col2 (type: string)
+            Execution mode: vectorized
         Map 9 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
-                  filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+                  filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
                   Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
-                    Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+                    Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ca_address_sk (type: int), ca_state (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
             Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+                Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
         Reducer 3 
             Local Work:
               Map Reduce Local Work
@@ -265,62 +269,54 @@ STAGE PLANS:
                 keys:
                   0 _col1 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14
+                Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col3 (type: int)
+                    0 _col2 (type: int)
                     1 _col0 (type: int)
-                  outputColumnNames: _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col14
+                  outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14, _col16
                   input vertices:
                     1 Map 8
-                  Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col4 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col4 (type: int)
-                    Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: int)
+                  Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) (type: boolean)
+                    Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col3 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col3 (type: int)
+                      Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col4 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
         Reducer 4 
+            Local Work:
+              Map Reduce Local Work
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col4 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10, _col14, _col16
-                Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) (type: boolean)
-                  Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col2 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col2 (type: int)
-                    Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col14 (type: int)
-        Reducer 5 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col2 (type: int)
+                  0 _col3 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col6, _col7, _col8, _col9, _col14, _col19, _col20
-                Statistics: Num rows: 17178642 Data size: 1515504822 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col4, _col5, _col7, _col8, _col9, _col18
+                Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: (((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) (type: boolean)
-                  Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col6 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
-                    outputColumnNames: _col6, _col8, _col9
-                    Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) (type: boolean)
+                  Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col4 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col5, _col7, _col8
+                    input vertices:
+                      1 Map 10
+                    Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: sum(_col6), count(_col6), sum(_col8), count(_col8), sum(_col9), count(_col9)
+                      aggregations: sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col8), count(_col8)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                       Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
@@ -328,7 +324,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint)
-        Reducer 6 
+        Reducer 5 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
index 3d6fbda..67684f6 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query15.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
@@ -157,7 +157,7 @@ STAGE PLANS:
                 outputColumnNames: _col3, _col4, _col7
                 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
+                  predicate: ((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
                   Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col4 (type: string), _col7 (type: decimal(7,2))

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
index 2f51a71..b5adc85 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
@@ -77,11 +77,11 @@ STAGE PLANS:
                   Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean)
-                    Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cc_call_center_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col2 (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
index 23f1e85..35405a7 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
@@ -158,16 +158,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 13 
             Map Operator Tree:
@@ -197,16 +197,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 7 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query18.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
index f8bec59..3ebd215 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
@@ -75,9 +75,9 @@ STAGE PLANS:
         Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306)
         Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 337), Reducer 10 (PARTITION-LEVEL SORT, 337)
         Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 374), Reducer 11 (PARTITION-LEVEL SORT, 374)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 428), Map 7 (PARTITION-LEVEL SORT, 428)
-        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 302), Reducer 2 (PARTITION-LEVEL SORT, 302)
-        Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 731), Reducer 3 (PARTITION-LEVEL SORT, 731)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855)
+        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
+        Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
         Reducer 5 <- Reducer 4 (GROUP, 1009)
         Reducer 6 <- Reducer 5 (SORT, 1)
 #### A masked pattern was here ####
@@ -90,16 +90,16 @@ STAGE PLANS:
                   Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean)
-                    Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col4
-                      Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col2 (type: int)
-                        Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int)
             Execution mode: vectorized
         Map 13 
@@ -169,16 +169,16 @@ STAGE PLANS:
                   Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean)
-                    Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
             Execution mode: vectorized
         Map 8 
@@ -281,12 +281,12 @@ STAGE PLANS:
                   0 _col2 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8
-                Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col1 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col1 (type: int)
-                  Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string)
         Reducer 3 
             Reduce Operator Tree:
@@ -297,12 +297,12 @@ STAGE PLANS:
                   0 _col1 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col0, _col4, _col6, _col7, _col8
-                Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string)
         Reducer 4 
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query20.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query20.q.out b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
index 76fae0b..86e0e72 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query20.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 338), Map 7 (PARTITION-LEVEL SORT, 338)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 341), Map 7 (PARTITION-LEVEL SORT, 341)
         Reducer 3 <- Reducer 2 (GROUP, 369)
         Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 185)
         Reducer 5 <- Reducer 4 (SORT, 1)
@@ -134,16 +134,16 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
             Execution mode: vectorized
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
index 08b0f93..4ccc2df 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
@@ -150,16 +150,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 19 
             Map Operator Tree:
@@ -208,16 +208,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 26 
             Map Operator Tree:
@@ -450,16 +450,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 48 
             Map Operator Tree:
@@ -508,16 +508,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 55 
             Map Operator Tree:
@@ -755,16 +755,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_date (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
             Execution mode: vectorized
         Map 12 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
index e7ed297..294222e 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
@@ -60,11 +60,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int), s_state (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col3 (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query29.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
index b070fc0..a734710 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
@@ -258,16 +258,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 7 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
index b40081e..bb2796b 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
@@ -72,10 +72,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: household_demographics
-                  filterExpr: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+                  filterExpr: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
                   Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+                    predicate: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
                     Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: hd_demo_sk (type: int)
@@ -95,11 +95,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col3 (type: int)
@@ -165,16 +165,16 @@ STAGE PLANS:
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query36.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query36.q.out b/ql/src/test/results/clientpositive/perf/spark/query36.q.out
index d3bea76..694e579 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query36.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query36.q.out
@@ -74,11 +74,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') and s_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col2 (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query37.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
index 17c85a6..bce0d68 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
@@ -96,16 +96,16 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2))
             Execution mode: vectorized
         Map 6 

http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
index d61f8b8..cac3d05 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
@@ -57,11 +57,11 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_id (type: string)
                       outputColumnNames: i_item_id
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count(), count(i_item_id)
                         mode: hash
@@ -90,13 +90,13 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 11 <- Map 10 (GROUP, 3)
+        Reducer 11 <- Map 10 (GROUP, 6)
         Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 14 (PARTITION-LEVEL SORT, 154)
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855)
         Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 777), Reducer 9 (PARTITION-LEVEL SORT, 777)
         Reducer 4 <- Reducer 3 (GROUP, 230)
         Reducer 5 <- Reducer 4 (SORT, 1)
-        Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 7), Reducer 11 (PARTITION-LEVEL SORT, 7)
+        Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 8), Reducer 11 (PARTITION-LEVEL SORT, 8)
         Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 174), Reducer 8 (PARTITION-LEVEL SORT, 174)
 #### A masked pattern was here ####
       Vertices:
@@ -128,21 +128,21 @@ STAGE PLANS:
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean)
-                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_id (type: string)
                       outputColumnNames: i_item_id
-                      Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         keys: i_item_id (type: string)
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: string)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 12 
             Map Operator Tree:
@@ -230,16 +230,16 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), true (type: boolean)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean)
         Reducer 13 
             Reduce Operator Tree: