You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/06/10 13:08:39 UTC

[hive] branch master updated: HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f29cb2245c9 HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)
f29cb2245c9 is described below

commit f29cb2245c97102975ea0dd73783049eaa0947a0
Author: Stamatis Zampetakis <za...@gmail.com>
AuthorDate: Tue May 17 15:20:06 2022 +0200

    HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)
    
    1. Decouple sort filter optimization from digest normalization by
    refactoring HiveSortFilterPredicates into a (DFS) visitor. We cannot
    use planner or rules cause they make use of digest. Performing this
    optimization using a visitor slightly simplifies the code since there
    is no need to have a registry since we are not going to visit the same
    node twice.
    
    2. Move the optimization after all post-join transformations to avoid
    having other optimizations cancel the benefit of the sort filter
    predicates.
    
    Closes #3299
---
 .../calcite/rules/HiveFilterSortPredicates.java    | 47 +++++++---------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  8 ++--
 .../clientpositive/llap/external_jdbc_table2.q.out |  2 +-
 .../perf/tpcds30tb/tez/cbo_ext_query1.q.out        |  4 +-
 .../perf/tpcds30tb/tez/cbo_query1.q.out            |  2 +-
 .../perf/tpcds30tb/tez/cbo_query11.q.out           |  8 ++--
 .../perf/tpcds30tb/tez/cbo_query31.q.out           |  2 +-
 .../perf/tpcds30tb/tez/cbo_query33.q.out           |  4 +-
 .../perf/tpcds30tb/tez/cbo_query34.q.out           |  2 +-
 .../perf/tpcds30tb/tez/cbo_query38.q.out           |  4 +-
 .../perf/tpcds30tb/tez/cbo_query4.q.out            | 12 +++---
 .../perf/tpcds30tb/tez/cbo_query54.q.out           |  2 +-
 .../perf/tpcds30tb/tez/cbo_query56.q.out           |  4 +-
 .../perf/tpcds30tb/tez/cbo_query6.q.out            |  2 +-
 .../perf/tpcds30tb/tez/cbo_query60.q.out           |  4 +-
 .../perf/tpcds30tb/tez/cbo_query65.q.out           |  2 +-
 .../perf/tpcds30tb/tez/cbo_query73.q.out           |  2 +-
 .../perf/tpcds30tb/tez/cbo_query78.q.out           |  2 +-
 .../perf/tpcds30tb/tez/cbo_query81.q.out           |  2 +-
 .../perf/tpcds30tb/tez/query11.q.out               |  4 +-
 .../clientpositive/perf/tpcds30tb/tez/query4.q.out |  6 +--
 21 files changed, 52 insertions(+), 73 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
index 780481f2fd5..6ecf94b5f63 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
@@ -20,8 +20,7 @@ import java.util.Comparator;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelHomogeneousShuttle;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
@@ -42,49 +41,34 @@ import org.slf4j.LoggerFactory;
 
 
 /**
- * Rule that sorts conditions in a filter predicate to accelerate query processing
+ * Sorts conditions in a filter predicate to accelerate query processing
  * based on selectivity and compute cost. Currently it is not applied recursively,
  * i.e., it is only applied to top predicates in the condition.
  */
-public class HiveFilterSortPredicates extends RelOptRule {
+public class HiveFilterSortPredicates extends RelHomogeneousShuttle {
 
   private static final Logger LOG = LoggerFactory.getLogger(HiveFilterSortPredicates.class);
 
   private final AtomicInteger noColsMissingStats;
 
   public HiveFilterSortPredicates(AtomicInteger noColsMissingStats) {
-    super(
-        operand(Filter.class,
-            operand(RelNode.class, any())));
     this.noColsMissingStats = noColsMissingStats;
   }
 
   @Override
-  public boolean matches(RelOptRuleCall call) {
-    final Filter filter = call.rel(0);
-
-    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
-
-    // If this operator has been visited already by the rule,
-    // we do not need to apply the optimization
-    if (registry != null && registry.getVisited(this).contains(filter)) {
-      return false;
+  public RelNode visit(RelNode other) {
+    RelNode visitedNode = super.visit(other);
+    if (visitedNode instanceof Filter) {
+      return rewriteFilter((Filter) visitedNode);
     }
 
-    return true;
+    return visitedNode;
   }
 
-  @Override
-  public void onMatch(RelOptRuleCall call) {
+  private RelNode rewriteFilter(Filter matchNode) {
     try {
-      final Filter filter = call.rel(0);
-      final RelNode input = call.rel(1);
-
-      // Register that we have visited this operator in this rule
-      HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
-      if (registry != null) {
-        registry.registerVisited(this, filter);
-      }
+      final Filter filter = matchNode;
+      final RelNode input = filter.getInput();
 
       final RexNode originalCond = filter.getCondition();
       final RexSortPredicatesShuttle sortPredicatesShuttle = new RexSortPredicatesShuttle(
@@ -92,16 +76,12 @@ public class HiveFilterSortPredicates extends RelOptRule {
       final RexNode newCond = originalCond.accept(sortPredicatesShuttle);
       if (!sortPredicatesShuttle.modified) {
         // We are done, bail out
-        return;
+        return matchNode;
       }
 
       // We register the new filter so we do not fire the rule on it again
       final Filter newFilter = filter.copy(filter.getTraitSet(), input, newCond);
-      if (registry != null) {
-        registry.registerVisited(this, newFilter);
-      }
-
-      call.transformTo(newFilter);
+      return newFilter;
     }
     catch (Exception e) {
       if (noColsMissingStats.get() > 0) {
@@ -111,6 +91,7 @@ public class HiveFilterSortPredicates extends RelOptRule {
         throw e;
       }
     }
+    return matchNode;
   }
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 47fd2048781..765e2e46463 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1757,6 +1757,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 5. Apply post-join order optimizations
       calcitePlan = applyPostJoinOrderingTransform(calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
+      if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
+        calcitePlan = calcitePlan.accept(new HiveFilterSortPredicates(noColsMissingStats));
+      }
       if (LOG.isDebugEnabled()) {
         LOG.debug("Plan after post-join transformations:\n" + RelOptUtil.toString(calcitePlan));
       }
@@ -2265,11 +2268,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
         generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
             HiveWindowingLastValueRewrite.INSTANCE);
       }
-      // 6. Sort predicates in filter expressions
-      if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
-        generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
-            new HiveFilterSortPredicates(noColsMissingStats));
-      }
 
       // 7. Apply Druid transformation rules
       generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
diff --git a/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out b/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
index c84efa0c6d5..1802545b44b 100644
--- a/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
+++ b/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
@@ -715,7 +715,7 @@ STAGE PLANS:
           properties:
             hive.sql.query SELECT "ikey"
 FROM "EXTERNAL_JDBC_SIMPLE_DERBY2_TABLE1"
-WHERE "bkey" IN (10, 20) AND "dkey" IN (15.15, 25.25) AND (("bkey" = 10 AND "dkey" = 15.15 OR "bkey" = 20 AND "dkey" = 25.25) AND "ikey" IS NOT NULL)
+WHERE ("bkey" = 10 AND "dkey" = 15.15 OR "bkey" = 20 AND "dkey" = 25.25) AND "bkey" IN (10, 20) AND ("dkey" IN (15.15, 25.25) AND "ikey" IS NOT NULL)
             hive.sql.query.fieldNames ikey
             hive.sql.query.fieldTypes int
             hive.sql.query.split true
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out
index b2523de9109..c2ac003ea42 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out
@@ -6,7 +6,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
         HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
           HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
         HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
-          HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
             HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
               HiveAggregate(group=[{0, 1}], agg#0=[sum($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
                 HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
@@ -40,7 +40,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
         HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
           HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
         HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[{572.8904857896465 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
-          HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
             HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
               HiveAggregate(group=[{0, 1}], agg#0=[sum($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
                 HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[{8.033148661966447E9 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out
index ef163906622..1216481c762 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out
@@ -6,7 +6,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100])
         HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
           HiveTableScan(table=[[default, customer]], table:alias=[customer])
         HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available])
-          HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2])
             HiveFilter(condition=[IS NOT NULL($2)])
               HiveAggregate(group=[{0, 1}], agg#0=[sum($2)])
                 HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out
index f93e90e7639..5af734f10a7 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out
@@ -11,7 +11,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                   HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                     HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                       HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$3], -=[-($2, $1)])
-                        HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+                        HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
                           HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
                             HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
                       HiveProject(d_date_sk=[$0])
@@ -26,7 +26,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                     HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                       HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                         HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$3], -=[-($2, $1)])
-                          HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+                          HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
                             HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
                               HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
                         HiveProject(d_date_sk=[$0])
@@ -40,7 +40,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                 HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                   HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                     HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], -=[-($2, $1)])
-                      HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
                         HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
                           HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
                     HiveProject(d_date_sk=[$0])
@@ -55,7 +55,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                 HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                   HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                     HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], -=[-($2, $1)])
-                      HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
                         HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
                           HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
                     HiveProject(d_date_sk=[$0])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out
index 4f8e2f30e15..cf6b0aab079 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out
@@ -42,7 +42,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($
               HiveProject(d_date_sk=[$0])
                 HiveFilter(condition=[AND(=($6, 2000), =($10, 2))])
                   HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
-    HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5])
+    HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5])
       HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
         HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
           HiveProject(ca_county=[$0], $f1=[$1])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out
index bbfcf4fce64..61b99c1ec82 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out
@@ -1,8 +1,8 @@
 CBO PLAN:
 HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
-  HiveProject($f0=[$0], $f1=[$1])
+  HiveProject(i_manufact_id=[$0], $f1=[$1])
     HiveAggregate(group=[{0}], agg#0=[sum($1)])
-      HiveProject($f0=[$0], $f1=[$1])
+      HiveProject(i_manufact_id=[$0], $f1=[$1])
         HiveUnion(all=[true])
           HiveProject(i_manufact_id=[$0], $f1=[$1])
             HiveAggregate(group=[{10}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out
index 47a13934edf..1b1c470f644 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out
@@ -4,7 +4,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
     HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
       HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
         HiveTableScan(table=[[default, customer]], table:alias=[customer])
-      HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+      HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
         HiveFilter(condition=[BETWEEN(false, $2, 15:BIGINT, 20:BIGINT)])
           HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
             HiveAggregate(group=[{0, 3}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out
index 7de5297409b..1d48ae1b6f0 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out
@@ -1,9 +1,9 @@
 CBO PLAN:
 HiveAggregate(group=[{}], agg#0=[count()])
-  HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
+  HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
     HiveFilter(condition=[=($3, 3)])
       HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
-        HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
+        HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
           HiveUnion(all=[true])
             HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3])
               HiveAggregate(group=[{0, 1, 2}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out
index 7d291323b55..e84ab82c70e 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out
@@ -10,7 +10,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                 HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                   HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                     HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
-                      HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
                         HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
                           HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
                     HiveProject(d_date_sk=[$0])
@@ -27,7 +27,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                       HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                         HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                           HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
-                            HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+                            HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
                               HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
                                 HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
                           HiveProject(d_date_sk=[$0])
@@ -42,7 +42,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                         HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                           HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                             HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
-                              HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+                              HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
                                 HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
                                   HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
                             HiveProject(d_date_sk=[$0])
@@ -56,7 +56,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                     HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                       HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                         HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
-                          HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+                          HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
                             HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33])
                               HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
                         HiveProject(d_date_sk=[$0])
@@ -71,7 +71,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                     HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                       HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                         HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
-                          HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+                          HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
                             HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33])
                               HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
                         HiveProject(d_date_sk=[$0])
@@ -86,7 +86,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
                 HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                   HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
                     HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
-                      HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+                      HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
                         HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
                           HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
                     HiveProject(d_date_sk=[$0])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out
index 10e305058e0..1feeb5e1d87 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out
@@ -33,7 +33,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
                             HiveProject($f0=[+($3, 1)])
                               HiveFilter(condition=[AND(=($6, 1999), =($8, 3))])
                                 HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
-              HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], $f0=[$5], $f1=[$6])
+              HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6])
                 HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
                   HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
                     HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out
index 413324af04c..d5229761f43 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out
@@ -1,8 +1,8 @@
 CBO PLAN:
 HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
-  HiveProject($f0=[$0], $f1=[$1])
+  HiveProject(i_item_id=[$0], $f1=[$1])
     HiveAggregate(group=[{0}], agg#0=[sum($1)])
-      HiveProject($f0=[$0], $f1=[$1])
+      HiveProject(i_item_id=[$0], $f1=[$1])
         HiveUnion(all=[true])
           HiveProject(i_item_id=[$0], $f1=[$1])
             HiveAggregate(group=[{10}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out
index 483e67ce9b1..51d018d50ff 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out
@@ -1,7 +1,7 @@
 Warning: Map Join MAPJOIN[168][bigTable=?] in task 'Map 1' is a cross product
 CBO PLAN:
 HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
-  HiveProject($f0=[$0], $f1=[$1])
+  HiveProject(ca_state=[$0], $f1=[$1])
     HiveFilter(condition=[>=($1, 10)])
       HiveAggregate(group=[{4}], agg#0=[count()])
         HiveJoin(condition=[=($7, $13)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out
index ab9511801fe..3109d4d558e 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out
@@ -1,8 +1,8 @@
 CBO PLAN:
 HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
-  HiveProject($f0=[$0], $f1=[$1])
+  HiveProject(i_item_id=[$0], $f1=[$1])
     HiveAggregate(group=[{0}], agg#0=[sum($1)])
-      HiveProject($f0=[$0], $f1=[$1])
+      HiveProject(i_item_id=[$0], $f1=[$1])
         HiveUnion(all=[true])
           HiveProject(i_item_id=[$0], $f1=[$1])
             HiveAggregate(group=[{10}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out
index e0f272ac21c..d8f2a2f16af 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out
@@ -6,7 +6,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
         HiveProject(s_store_sk=[$0], s_store_name=[$5])
           HiveTableScan(table=[[default, store]], table:alias=[store])
         HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
-          HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+          HiveProject(ss_store_sk=[$0], ss_item_sk=[$1], $f2=[$2])
             HiveFilter(condition=[IS NOT NULL($2)])
               HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2])
                 HiveAggregate(group=[{0, 1}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out
index bf473fb5007..03c2ec79256 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out
@@ -4,7 +4,7 @@ HiveSortLimit(sort0=[$5], dir0=[DESC])
     HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
       HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
         HiveTableScan(table=[[default, customer]], table:alias=[customer])
-      HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+      HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
         HiveFilter(condition=[BETWEEN(false, $2, 1:BIGINT, 5:BIGINT)])
           HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
             HiveAggregate(group=[{0, 3}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out
index 20c5d34854f..f1ef1809e23 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out
@@ -32,7 +32,7 @@ HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[
                             HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
                         HiveProject(cr_item_sk=[$1], cr_order_number=[$15])
                           HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns])
-        HiveProject($f1=[$0], $f2=[$1], $f2_0=[$2], $f3=[$3], $f4=[$4])
+        HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
           HiveFilter(condition=[>($2, 0)])
             HiveAggregate(group=[{1, 2}], agg#0=[sum($3)], agg#1=[sum($4)], agg#2=[sum($5)])
               HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out
index fc50acee081..d1bdb18108d 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out
@@ -11,7 +11,7 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam
             HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12])
               HiveFilter(condition=[=($8, _UTF-16LE'IL')])
                 HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
-          HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+          HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2])
             HiveFilter(condition=[IS NOT NULL($2)])
               HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
                 HiveAggregate(group=[{1, 2}], agg#0=[sum($4)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out
index ab43070d7fa..b3ae8c6ce6c 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out
@@ -206,7 +206,7 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 21600036511 Data size: 5182756360536 Basic stats: COMPLETE Column stats: COMPLETE
                     Filter Operator
-                      predicate: (_col3 is not null and _col0 is not null) (type: boolean)
+                      predicate: (_col0 is not null and _col3 is not null) (type: boolean)
                       Statistics: Num rows: 21594643099 Data size: 5181462254384 Basic stats: COMPLETE Column stats: COMPLETE
                       Select Operator
                         expressions: _col0 (type: bigint), _col3 (type: bigint), (_col2 - _col1) (type: decimal(8,2))
@@ -295,7 +295,7 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 86404891377 Data size: 19834337697608 Basic stats: COMPLETE Column stats: COMPLETE
                     Filter Operator
-                      predicate: (_col3 is not null and _col0 is not null) (type: boolean)
+                      predicate: (_col0 is not null and _col3 is not null) (type: boolean)
                       Statistics: Num rows: 82514936083 Data size: 18941394188296 Basic stats: COMPLETE Column stats: COMPLETE
                       Select Operator
                         expressions: _col0 (type: bigint), _col3 (type: bigint), (_col2 - _col1) (type: decimal(8,2))
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out
index a430a23d8f5..d2d87dc9f22 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out
@@ -44,7 +44,7 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 86404891377 Data size: 38316552569400 Basic stats: COMPLETE Column stats: COMPLETE
                     Filter Operator
-                      predicate: (_col5 is not null and _col0 is not null) (type: boolean)
+                      predicate: (_col0 is not null and _col5 is not null) (type: boolean)
                       Statistics: Num rows: 82514936083 Data size: 36591538231240 Basic stats: COMPLETE Column stats: COMPLETE
                       Select Operator
                         expressions: _col0 (type: bigint), _col5 (type: bigint), ((((_col4 - _col3) - _col1) + _col2) / 2) (type: decimal(14,6))
@@ -96,7 +96,7 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 43220864887 Data size: 19956340213184 Basic stats: COMPLETE Column stats: COMPLETE
                     Filter Operator
-                      predicate: (_col5 is not null and _col0 is not null) (type: boolean)
+                      predicate: (_col0 is not null and _col5 is not null) (type: boolean)
                       Statistics: Num rows: 43007130172 Data size: 19857652630296 Basic stats: COMPLETE Column stats: COMPLETE
                       Select Operator
                         expressions: _col0 (type: bigint), _col5 (type: bigint), ((((_col4 - _col3) - _col1) + _col2) / 2) (type: decimal(14,6))
@@ -205,7 +205,7 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 21600036511 Data size: 10019954898456 Basic stats: COMPLETE Column stats: COMPLETE
                     Filter Operator
-                      predicate: (_col5 is not null and _col0 is not null) (type: boolean)
+                      predicate: (_col0 is not null and _col5 is not null) (type: boolean)
                       Statistics: Num rows: 21594643099 Data size: 10017452970080 Basic stats: COMPLETE Column stats: COMPLETE
                       Select Operator
                         expressions: _col0 (type: bigint), _col5 (type: bigint), ((((_col4 - _col3) - _col1) + _col2) / 2) (type: decimal(14,6))