You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/10/13 18:16:10 UTC
[4/6] hive git commit: HIVE-20704: Extend HivePreFilteringRule to support other functions (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/f0b76e24/ql/src/test/results/clientpositive/perf/spark/query88.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query88.q.out b/ql/src/test/results/clientpositive/perf/spark/query88.q.out
index fbc5d93..029da52 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query88.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query88.q.out
@@ -234,19 +234,19 @@ STAGE PLANS:
         Map 8 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -254,19 +254,19 @@ STAGE PLANS:
         Map 9 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -301,9 +301,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 13
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -311,7 +311,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -355,9 +355,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 18
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -365,7 +365,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -409,9 +409,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 23
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -419,7 +419,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -463,9 +463,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 28
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -473,7 +473,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -517,9 +517,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 33
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -527,7 +527,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -571,9 +571,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 38
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -581,7 +581,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -625,9 +625,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 8
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -635,7 +635,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -810,19 +810,19 @@ STAGE PLANS:
         Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -830,19 +830,19 @@ STAGE PLANS:
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -891,9 +891,9 @@ STAGE PLANS:
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col2
                         input vertices:
                           1 Map 3
                         Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
@@ -901,7 +901,7 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col1 (type: int)
+                            0 _col0 (type: int)
                             1 _col0 (type: int)
                           outputColumnNames: _col2
                           input vertices:
@@ -985,19 +985,19 @@ STAGE PLANS:
         Map 13 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1005,19 +1005,19 @@ STAGE PLANS:
         Map 14 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1050,19 +1050,19 @@ STAGE PLANS:
         Map 18 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1070,19 +1070,19 @@ STAGE PLANS:
         Map 19 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1115,19 +1115,19 @@ STAGE PLANS:
         Map 23 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1135,19 +1135,19 @@ STAGE PLANS:
         Map 24 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1180,19 +1180,19 @@ STAGE PLANS:
         Map 28 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1200,19 +1200,19 @@ STAGE PLANS:
         Map 29 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1245,19 +1245,19 @@ STAGE PLANS:
         Map 33 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1265,19 +1265,19 @@ STAGE PLANS:
         Map 34 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1310,19 +1310,19 @@ STAGE PLANS:
         Map 38 
             Map Operator Tree:
                 TableScan
-                  alias: time_dim
-                  filterExpr: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
+                  alias: household_demographics
+                  filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
+                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
-                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: t_time_sk (type: int)
+                      expressions: hd_demo_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: int)
+                          0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:
@@ -1330,19 +1330,19 @@ STAGE PLANS:
         Map 39 
             Map Operator Tree:
                 TableScan
-                  alias: household_demographics
-                  filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                  Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+                  alias: time_dim
+                  filterExpr: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                  Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean)
-                    Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean)
+                    Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: hd_demo_sk (type: int)
+                      expressions: t_time_sk (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col1 (type: int)
+                          0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
             Local Work:

http://git-wip-us.apache.org/repos/asf/hive/blob/f0b76e24/ql/src/test/results/clientpositive/perf/spark/query89.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query89.q.out b/ql/src/test/results/clientpositive/perf/spark/query89.q.out
index 66eb333..589dead 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query89.q.out
@@ -96,8 +96,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398)
-        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403)
+        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438)
         Reducer 4 <- Reducer 3 (GROUP, 529)
         Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265)
         Reducer 6 <- Reducer 5 (SORT, 1)
@@ -117,51 +117,51 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: int)
                         Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
+                        value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
             Execution mode: vectorized
         Map 7 
             Map Operator Tree:
                 TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  alias: item
+                  filterExpr: ((i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants') and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and (((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean)
+                  Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and (i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants') and i_item_sk is not null) (type: boolean)
+                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: d_date_sk (type: int), d_moy (type: int)
-                      outputColumnNames: _col0, _col2
-                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                      expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col2 (type: int)
+                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
             Execution mode: vectorized
         Map 8 
             Map Operator Tree:
                 TableScan
-                  alias: item
-                  filterExpr: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean)
-                  Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                  alias: date_dim
+                  filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean)
-                    Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                    predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string)
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+                      expressions: d_date_sk (type: int), d_moy (type: int)
+                      outputColumnNames: _col0, _col2
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: int)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
@@ -169,16 +169,16 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
+                  0 _col1 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col1, _col2, _col3, _col6
+                outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7
                 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: int)
+                  key expressions: _col0 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: int)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col6 (type: int)
+                  value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string)
         Reducer 3 
             Local Work:
               Map Reduce Local Work
@@ -187,9 +187,9 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: int)
+                  0 _col0 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col2, _col3, _col6, _col8, _col9, _col10
+                outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col10
                 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
@@ -197,20 +197,20 @@ STAGE PLANS:
                   keys:
                     0 _col2 (type: int)
                     1 _col0 (type: int)
-                  outputColumnNames: _col3, _col6, _col8, _col9, _col10, _col12, _col13
+                  outputColumnNames: _col3, _col5, _col6, _col7, _col10, _col12, _col13
                   input vertices:
                     1 Map 9
                   Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: sum(_col3)
-                    keys: _col6 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col12 (type: string), _col13 (type: string)
+                    keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col12 (type: string), _col13 (type: string)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                     Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+                      key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string)
                       sort order: ++++++
-                      Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string)
                       Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col6 (type: decimal(17,2))
         Reducer 4 
@@ -218,34 +218,34 @@ STAGE PLANS:
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string)
+                  key expressions: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string)
                   sort order: ++++
-                  Map-reduce partition columns: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string)
+                  Map-reduce partition columns: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string)
                   Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int), _col2 (type: string), _col6 (type: decimal(17,2))
+                  value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2))
         Reducer 5 
             Execution mode: vectorized
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2))
+                expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2))
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: ptf_0
-                        output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2)
+                        output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2)
                         type: WINDOWING
                       Windowing table definition
                         input alias: ptf_1
                         name: windowingtablefunction
-                        order by: _col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
-                        partition by: _col3, _col1, _col4, _col5
+                        order by: _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST
+                        partition by: _col2, _col0, _col4, _col5
                         raw input shape:
                         window functions:
                             window function definition
@@ -256,14 +256,14 @@ STAGE PLANS:
                               window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
                   Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2))
+                    expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2))
                     outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6
                     Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                     Filter Operator
                       predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean)
                       Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                       Select Operator
-                        expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6))
+                        expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6))
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
                         Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator