You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/10/26 21:12:18 UTC

[68/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)

HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3cbc13e9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3cbc13e9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3cbc13e9

Branch: refs/heads/master-tez092
Commit: 3cbc13e92b9c22fabf9eac72eaec9352eb9b43d2
Parents: 94d4991
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Oct 22 18:30:18 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Oct 24 16:11:48 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/ppd/SyntheticJoinPredicate.java     |  17 +-
 .../queries/clientpositive/perf/cbo_query24.q   |   3 +-
 .../test/queries/clientpositive/perf/query24.q  |   3 +-
 .../clientpositive/perf/spark/query24.q.out     | 400 ++++++++---------
 .../clientpositive/perf/tez/cbo_query23.q.out   |   8 +-
 .../clientpositive/perf/tez/cbo_query24.q.out   | 103 ++---
 .../perf/tez/constraints/cbo_query24.q.out      | 101 ++---
 .../perf/tez/constraints/cbo_query6.q.out       |   2 +-
 .../perf/tez/constraints/query18.q.out          | 108 ++---
 .../perf/tez/constraints/query24.q.out          | 436 ++++++++++---------
 .../perf/tez/constraints/query33.q.out          | 202 ++++-----
 .../perf/tez/constraints/query56.q.out          | 236 +++++-----
 .../perf/tez/constraints/query6.q.out           | 132 +++---
 .../perf/tez/constraints/query60.q.out          | 242 +++++-----
 .../perf/tez/constraints/query95.q.out          | 128 +++---
 .../clientpositive/perf/tez/query18.q.out       | 112 ++---
 .../clientpositive/perf/tez/query23.q.out       | 340 +++++++--------
 .../clientpositive/perf/tez/query24.q.out       | 436 ++++++++++---------
 .../clientpositive/perf/tez/query59.q.out       |  74 ++--
 .../clientpositive/perf/tez/query95.q.out       | 180 ++++----
 20 files changed, 1664 insertions(+), 1599 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
index 1f533bc..e97e447 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
@@ -308,7 +308,22 @@ public class SyntheticJoinPredicate extends Transform {
       CommonJoinOperator<JoinDesc> joinOp = (CommonJoinOperator) currentOp;
 
       // 2. Backtrack expression to join output
-      final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp);
+      ExprNodeDesc expr = currentNode;
+      if (currentOp != op) {
+        if (expr instanceof ExprNodeColumnDesc) {
+          // Expression refers to output of current operator, but backtrack methods works
+          // from the input columns, hence we need to make resolution for current operator
+          // here. If the operator was already the join, there is nothing to do
+          if (op.getColumnExprMap() != null) {
+            expr = op.getColumnExprMap().get(((ExprNodeColumnDesc) expr).getColumn());
+          }
+        } else {
+          // TODO: We can extend to other expression types
+          // We are done
+          return true;
+        }
+      }
+      final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(expr, op, joinOp);
       if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) {
         // TODO: We can extend to other expression types
         // We are done

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/queries/clientpositive/perf/cbo_query24.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query24.q b/ql/src/test/queries/clientpositive/perf/cbo_query24.q
index 02bcbaf..8994de7 100644
--- a/ql/src/test/queries/clientpositive/perf/cbo_query24.q
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query24.q
@@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/queries/clientpositive/perf/query24.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query24.q b/ql/src/test/queries/clientpositive/perf/query24.q
index 007d7ee..b3cdaef 100644
--- a/ql/src/test/queries/clientpositive/perf/query24.q
+++ b/ql/src/test/queries/clientpositive/perf/query24.q
@@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/spark/query24.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out
index 4e2e8e7..91fe702 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out
@@ -1,4 +1,4 @@
-Warning: Map Join MAPJOIN[104][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[107][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain
 with ssales as
 (select c_last_name
@@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -138,8 +140,8 @@ STAGE PLANS:
                       Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col2 (type: int)
-                          1 _col0 (type: int)
+                          0 _col2 (type: string)
+                          1 _col4 (type: string)
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -147,43 +149,43 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975)
-        Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486)
-        Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564)
-        Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899)
-        Reducer 17 <- Reducer 16 (GROUP, 640)
+        Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 887), Map 20 (PARTITION-LEVEL SORT, 887)
+        Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 989), Reducer 13 (PARTITION-LEVEL SORT, 989)
+        Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 442), Reducer 14 (PARTITION-LEVEL SORT, 442)
+        Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 516), Reducer 15 (PARTITION-LEVEL SORT, 516)
+        Reducer 17 <- Reducer 16 (GROUP, 529)
         Reducer 18 <- Reducer 17 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 12 
             Map Operator Tree:
                 TableScan
-                  alias: store_sales
-                  filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean)
-                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                  alias: customer_address
+                  filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean)
+                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
-                    Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean)
+                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2))
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                      Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                      expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), ca_country (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
-                          0 _col2 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9
+                          0 _col2 (type: string)
+                          1 _col4 (type: string)
+                        outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7
                         input vertices:
                           1 Map 19
-                        Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
-                          key expressions: _col1 (type: int)
+                          key expressions: _col0 (type: int)
                           sort order: +
-                          Map-reduce partition columns: _col1 (type: int)
-                          Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string)
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string)
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -191,25 +193,45 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: customer
-                  filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean)
+                  filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean)
                   Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean)
+                    predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
                     Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string)
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                      expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: int)
                         Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                        value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
             Execution mode: vectorized
         Map 21 
             Map Operator Tree:
                 TableScan
+                  alias: store_sales
+                  filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean)
+                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean)
+                    Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2))
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int), _col2 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
+            Execution mode: vectorized
+        Map 22 
+            Map Operator Tree:
+                TableScan
                   alias: item
                   filterExpr: i_item_sk is not null (type: boolean)
                   Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
@@ -227,7 +249,7 @@ STAGE PLANS:
                         Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int)
             Execution mode: vectorized
-        Map 22 
+        Map 23 
             Map Operator Tree:
                 TableScan
                   alias: store_returns
@@ -246,109 +268,92 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                         Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 23 
-            Map Operator Tree:
-                TableScan
-                  alias: customer_address
-                  filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean)
-                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean)
-                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string), upper(_col2) (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string)
-                        Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string)
-            Execution mode: vectorized
         Reducer 13 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13
-                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string)
+                  0 _col0 (type: int)
+                  1 _col1 (type: int)
+                outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col11, _col12, _col13
+                Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col13 <> upper(_col3)) (type: boolean)
+                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col9 (type: int), _col4 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col9 (type: int), _col4 (type: int)
+                    Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string)
         Reducer 14 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19
-                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                  0 _col9 (type: int), _col4 (type: int)
+                  1 _col1 (type: int), _col2 (type: int)
+                outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col14, _col17, _col18
+                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int), _col3 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
-                  Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int)
+                  key expressions: _col14 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col14 (type: int)
+                  Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string), _col17 (type: int), _col18 (type: decimal(7,2))
         Reducer 15 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int), _col3 (type: int)
-                  1 _col0 (type: int), _col1 (type: int)
-                outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19
-                Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
+                  0 _col14 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col14, _col17, _col18, _col20, _col21, _col22, _col23, _col24
+                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col9 (type: string), _col13 (type: string)
+                  key expressions: _col14 (type: int), _col17 (type: int)
                   sort order: ++
-                  Map-reduce partition columns: _col9 (type: string), _col13 (type: string)
-                  Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int)
+                  Map-reduce partition columns: _col14 (type: int), _col17 (type: int)
+                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string), _col18 (type: decimal(7,2)), _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int)
         Reducer 16 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col9 (type: string), _col13 (type: string)
-                  1 _col1 (type: string), upper(_col2) (type: string)
-                outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22
-                Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+                  0 _col14 (type: int), _col17 (type: int)
+                  1 _col0 (type: int), _col1 (type: int)
+                outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col18, _col20, _col21, _col22, _col23, _col24
+                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: sum(_col4)
-                  keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string)
+                  aggregations: sum(_col18)
+                  keys: _col11 (type: string), _col12 (type: string), _col1 (type: string), _col5 (type: string), _col7 (type: string), _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int)
                   mode: hash
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                  Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string)
+                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int)
                     sort order: ++++++++++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string)
-                    Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int)
+                    Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col10 (type: decimal(17,2))
         Reducer 17 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: decimal(7,2)), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col10 (type: decimal(17,2))
                   outputColumnNames: _col10
-                  Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: sum(_col10), count(_col10)
                     mode: hash
@@ -381,7 +386,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 8 
+        Map 9 
             Map Operator Tree:
                 TableScan
                   alias: store
@@ -396,8 +401,8 @@ STAGE PLANS:
                       Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col2 (type: int)
-                          1 _col0 (type: int)
+                          0 _col2 (type: string)
+                          1 _col4 (type: string)
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -405,11 +410,11 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400)
-        Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009)
-        Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564)
-        Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899)
-        Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 6 (PARTITION-LEVEL SORT, 400)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 8 (PARTITION-LEVEL SORT, 1009)
+        Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 516), Reducer 3 (PARTITION-LEVEL SORT, 516)
+        Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 529)
+        Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 887), Map 7 (PARTITION-LEVEL SORT, 887)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -435,6 +440,26 @@ STAGE PLANS:
         Map 10 
             Map Operator Tree:
                 TableScan
+                  alias: customer
+                  filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean)
+                  Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
+                    Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string)
+            Execution mode: vectorized
+        Map 11 
+            Map Operator Tree:
+                TableScan
                   alias: store_returns
                   filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean)
                   Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
@@ -451,27 +476,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                         Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 11 
-            Map Operator Tree:
-                TableScan
-                  alias: customer_address
-                  filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean)
-                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean)
-                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string), upper(_col2) (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string)
-                        Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string)
-            Execution mode: vectorized
-        Map 7 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: item
@@ -491,29 +496,39 @@ STAGE PLANS:
                         Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int)
             Execution mode: vectorized
-        Map 9 
+        Map 7 
             Map Operator Tree:
                 TableScan
-                  alias: customer
-                  filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean)
-                  Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                  alias: customer_address
+                  filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean)
+                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean)
-                    Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean)
+                    Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string)
+                      expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), ca_country (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                      Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col2 (type: string)
+                          1 _col4 (type: string)
+                        outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7
+                        input vertices:
+                          1 Map 9
+                        Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string)
             Execution mode: vectorized
-        Reducer 2 
             Local Work:
               Map Reduce Local Work
+        Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -523,38 +538,28 @@ STAGE PLANS:
                   1 _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10
                 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col2 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15
-                  input vertices:
-                    1 Map 8
-                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string)
+                Reduce Output Operator
+                  key expressions: _col1 (type: int), _col2 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int)
         Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19
-                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                  0 _col1 (type: int), _col2 (type: int)
+                  1 _col0 (type: int), _col9 (type: int)
+                outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col13, _col14, _col17, _col21, _col23
+                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col3 (type: int)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
-                  Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string)
+                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col13 (type: string), _col14 (type: string), _col17 (type: string), _col21 (type: string), _col23 (type: string)
         Reducer 4 
             Reduce Operator Tree:
               Join Operator
@@ -563,61 +568,45 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: int), _col3 (type: int)
                   1 _col0 (type: int), _col1 (type: int)
-                outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19
-                Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col15 (type: string), _col19 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col15 (type: string), _col19 (type: string)
-                  Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string)
-        Reducer 5 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col15 (type: string), _col19 (type: string)
-                  1 _col1 (type: string), upper(_col2) (type: string)
-                outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22
-                Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col13, _col14, _col17, _col21, _col23
+                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: sum(_col4)
-                  keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string)
+                  keys: _col13 (type: string), _col14 (type: string), _col21 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col17 (type: string), _col23 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                  Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+                    key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
                     sort order: +++++++++
                     Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col9 (type: decimal(17,2))
-        Reducer 6 
+        Reducer 5 
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int), KEY._col8 (type: string)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: decimal(7,2)), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: int), KEY._col7 (type: string), KEY._col8 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2))
-                  outputColumnNames: _col1, _col2, _col7, _col9
-                  Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE
+                  outputColumnNames: _col4, _col5, _col7, _col9
+                  Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: sum(_col9)
-                    keys: _col1 (type: string), _col2 (type: string), _col7 (type: string)
+                    keys: _col4 (type: string), _col5 (type: string), _col7 (type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: decimal(27,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -627,21 +616,44 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
                         input vertices:
                           1 Reducer 18
-                        Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 191662559 Data size: 61565902849 Basic stats: COMPLETE Column stats: NONE
                         Filter Operator
                           predicate: (_col3 > _col4) (type: boolean)
-                          Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE
                           Select Operator
                             expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2))
                             outputColumnNames: _col0, _col1, _col2, _col3
-                            Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE
                             File Output Operator
                               compressed: false
-                              Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE
                               table:
                                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 8 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col1 (type: int)
+                outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col11, _col12, _col13
+                Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col13 <> upper(_col3)) (type: boolean)
+                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col9 (type: int), _col11 (type: string), _col12 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string)
+                    outputColumnNames: _col0, _col2, _col3, _col6, _col9, _col10, _col12
+                    Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col9 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col9 (type: int)
+                      Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col2 (type: string), _col3 (type: string), _col6 (type: string), _col10 (type: string), _col12 (type: string)
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
index baf790e..ace7cf5 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
@@ -1,7 +1,7 @@
-Warning: Shuffle Join MERGEJOIN[589][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product
-Warning: Shuffle Join MERGEJOIN[590][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product
-Warning: Shuffle Join MERGEJOIN[592][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product
-Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product
+Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product
+Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product
+Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product
 PREHOOK: query: explain cbo
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
index 53220d2..1d005b8 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product
+Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product
 PREHOOK: query: explain cbo
 with ssales as
 (select c_last_name
@@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -115,57 +117,58 @@ CBO PLAN:
 HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
   HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available])
     HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3])
-      HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)])
-        HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9])
-          HiveAggregate(group=[{0, 6, 7, 15, 16, 18, 19, 21, 23}], agg#0=[sum($13)])
-            HiveJoin(condition=[AND(=($8, UPPER($2)), =($24, $1))], joinType=[inner], algorithm=[none], cost=[not available])
-              HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10])
-                HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))])
-                  HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
-              HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available])
-                HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
-                  HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))])
-                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
-                HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
-                  HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
-                    HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))])
-                      HiveTableScan(table=[[default, customer]], table:alias=[customer])
-                  HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available])
-                    HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
-                      HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
-                        HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))])
-                          HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
-                      HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20])
-                        HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))])
-                          HiveTableScan(table=[[default, item]], table:alias=[item])
-                    HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25])
-                      HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))])
-                        HiveTableScan(table=[[default, store]], table:alias=[store])
+      HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)])
+        HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9])
+          HiveAggregate(group=[{8, 9, 11, 12, 15, 16, 19, 23, 25}], agg#0=[sum($6)])
+            HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available])
+              HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
+                HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))])
+                  HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
+              HiveJoin(condition=[AND(=($1, $11), =($2, $20))], joinType=[inner], algorithm=[none], cost=[not available])
+                HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+                  HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
+                    HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))])
+                      HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+                  HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20])
+                    HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))])
+                      HiveTableScan(table=[[default, item]], table:alias=[item])
+                HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], ca_country=[$8], s_store_sk=[$9], s_store_name=[$10], s_market_id=[$11], s_state=[$12], s_zip=[$13])
+                  HiveJoin(condition=[AND(=($1, $5), <>($4, UPPER($8)))], joinType=[inner], algorithm=[none], cost=[not available])
+                    HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))])
+                        HiveTableScan(table=[[default, customer]], table:alias=[customer])
+                    HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+                      HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], ca_country=[$10])
+                        HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))])
+                          HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+                      HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25])
+                        HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))])
+                          HiveTableScan(table=[[default, store]], table:alias=[store])
     HiveProject(_o__c0=[*(0.05, /($0, $1))])
       HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)])
-        HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10])
-          HiveAggregate(group=[{3, 4, 12, 14, 17, 18, 19, 20, 21, 22}], agg#0=[sum($10)])
-            HiveJoin(condition=[AND(=($5, UPPER($24)), =($15, $23))], joinType=[inner], algorithm=[none], cost=[not available])
-              HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available])
-                HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
-                  HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))])
-                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
-                HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available])
-                  HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
-                    HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
-                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))])
+        HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10])
+          HiveAggregate(group=[{7, 8, 11, 15, 17, 20, 21, 22, 23, 24}], agg#0=[sum($4)])
+            HiveJoin(condition=[AND(=($3, $26), =($0, $25))], joinType=[inner], algorithm=[none], cost=[not available])
+              HiveJoin(condition=[=($0, $19)], joinType=[inner], algorithm=[none], cost=[not available])
+                HiveJoin(condition=[AND(=($1, $5), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available])
+                  HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
+                    HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))])
+                      HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+                  HiveJoin(condition=[AND(=($1, $5), <>($4, UPPER($8)))], joinType=[inner], algorithm=[none], cost=[not available])
+                    HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))])
                         HiveTableScan(table=[[default, customer]], table:alias=[customer])
-                    HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available])
-                      HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
-                        HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))])
-                          HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+                    HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+                      HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], ca_country=[$10])
+                        HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))])
+                          HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
                       HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25])
                         HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))])
                           HiveTableScan(table=[[default, store]], table:alias=[store])
-                  HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20])
-                    HiveFilter(condition=[IS NOT NULL($0)])
-                      HiveTableScan(table=[[default, item]], table:alias=[item])
-              HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10])
-                HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))])
-                  HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+                HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20])
+                  HiveFilter(condition=[IS NOT NULL($0)])
+                    HiveTableScan(table=[[default, item]], table:alias=[item])
+              HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
+                HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))])
+                  HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
index 34cc51b..0801f34 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product
+Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product
 PREHOOK: query: explain cbo
 with ssales as
 (select c_last_name
@@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -115,54 +117,55 @@ CBO PLAN:
 HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
   HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available])
     HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3])
-      HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)])
-        HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9])
-          HiveAggregate(group=[{0, 6, 7, 15, 16, 17, 18, 20, 21}], agg#0=[sum($13)])
-            HiveJoin(condition=[AND(=($8, $2), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available])
-              HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)])
-                HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))])
-                  HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
-              HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available])
-                HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
-                  HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
-                HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
-                  HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
-                    HiveFilter(condition=[IS NOT NULL($14)])
-                      HiveTableScan(table=[[default, customer]], table:alias=[customer])
-                  HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available])
-                    HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
-                      HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
-                        HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))])
-                          HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
-                      HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20])
-                        HiveFilter(condition=[=($17, _UTF-16LE'orchid')])
-                          HiveTableScan(table=[[default, item]], table:alias=[item])
-                    HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25])
-                      HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))])
-                        HiveTableScan(table=[[default, store]], table:alias=[store])
-    HiveProject(_o__c0=[*(0.05, /($0, $1))])
-      HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)])
-        HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10])
-          HiveAggregate(group=[{3, 4, 12, 13, 16, 17, 18, 19, 20, 21}], agg#0=[sum($10)])
-            HiveJoin(condition=[AND(=($5, $23), =($14, $22))], joinType=[inner], algorithm=[none], cost=[not available])
-              HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available])
-                HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
-                  HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
-                HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available])
-                  HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
-                    HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
-                      HiveFilter(condition=[IS NOT NULL($14)])
+      HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)])
+        HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9])
+          HiveAggregate(group=[{8, 9, 10, 11, 14, 15, 18, 22, 23}], agg#0=[sum($6)])
+            HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available])
+              HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
+                HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
+              HiveJoin(condition=[AND(=($1, $10), =($2, $19))], joinType=[inner], algorithm=[none], cost=[not available])
+                HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+                  HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
+                    HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))])
+                      HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+                  HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20])
+                    HiveFilter(condition=[=($17, _UTF-16LE'orchid')])
+                      HiveTableScan(table=[[default, item]], table:alias=[item])
+                HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12])
+                  HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available])
+                    HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+                      HiveFilter(condition=[IS NOT NULL($4)])
                         HiveTableScan(table=[[default, customer]], table:alias=[customer])
-                    HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available])
-                      HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
-                        HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))])
-                          HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+                    HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+                      HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)])
+                        HiveFilter(condition=[IS NOT NULL($9)])
+                          HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
                       HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25])
                         HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))])
                           HiveTableScan(table=[[default, store]], table:alias=[store])
-                  HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20])
-                    HiveTableScan(table=[[default, item]], table:alias=[item])
-              HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)])
-                HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))])
-                  HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+    HiveProject(_o__c0=[*(0.05, /($0, $1))])
+      HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)])
+        HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10])
+          HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24, 25}], agg#0=[sum($6)])
+            HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available])
+              HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
+                HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
+              HiveJoin(condition=[=($0, $18)], joinType=[inner], algorithm=[none], cost=[not available])
+                HiveJoin(condition=[AND(=($17, $12), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available])
+                  HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+                    HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13])
+                      HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))])
+                        HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+                    HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available])
+                      HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+                        HiveFilter(condition=[IS NOT NULL($4)])
+                          HiveTableScan(table=[[default, customer]], table:alias=[customer])
+                      HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)])
+                        HiveFilter(condition=[IS NOT NULL($9)])
+                          HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+                  HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25])
+                    HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))])
+                      HiveTableScan(table=[[default, store]], table:alias=[store])
+                HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20])
+                  HiveTableScan(table=[[default, item]], table:alias=[item])
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
index ef53060..cbf372a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
@@ -1,4 +1,4 @@
-Warning: Map Join MAPJOIN[172][bigTable=?] in task 'Reducer 15' is a cross product
+Warning: Map Join MAPJOIN[170][bigTable=?] in task 'Reducer 15' is a cross product
 PREHOOK: query: explain cbo
 select  a.ca_state state, count(*) cnt
  from customer_address a