You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/06/10 13:08:39 UTC
[hive] branch master updated: HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f29cb2245c9 HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)
f29cb2245c9 is described below
commit f29cb2245c97102975ea0dd73783049eaa0947a0
Author: Stamatis Zampetakis <za...@gmail.com>
AuthorDate: Tue May 17 15:20:06 2022 +0200
HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)
1. Decouple sort filter optimization from digest normalization by
refactoring HiveSortFilterPredicates into a (DFS) visitor. We cannot
use planner or rules cause they make use of digest. Performing this
optimization using a visitor slightly simplifies the code since there
is no need to have a registry since we are not going to visit the same
node twice.
2. Move the optimization after all post-join transformations to avoid
having other optimizations cancel the benefit of the sort filter
predicates.
Closes #3299
---
.../calcite/rules/HiveFilterSortPredicates.java | 47 +++++++---------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 8 ++--
.../clientpositive/llap/external_jdbc_table2.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_ext_query1.q.out | 4 +-
.../perf/tpcds30tb/tez/cbo_query1.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query11.q.out | 8 ++--
.../perf/tpcds30tb/tez/cbo_query31.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query33.q.out | 4 +-
.../perf/tpcds30tb/tez/cbo_query34.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query38.q.out | 4 +-
.../perf/tpcds30tb/tez/cbo_query4.q.out | 12 +++---
.../perf/tpcds30tb/tez/cbo_query54.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query56.q.out | 4 +-
.../perf/tpcds30tb/tez/cbo_query6.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query60.q.out | 4 +-
.../perf/tpcds30tb/tez/cbo_query65.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query73.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query78.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query81.q.out | 2 +-
.../perf/tpcds30tb/tez/query11.q.out | 4 +-
.../clientpositive/perf/tpcds30tb/tez/query4.q.out | 6 +--
21 files changed, 52 insertions(+), 73 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
index 780481f2fd5..6ecf94b5f63 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
@@ -20,8 +20,7 @@ import java.util.Comparator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelHomogeneousShuttle;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
@@ -42,49 +41,34 @@ import org.slf4j.LoggerFactory;
/**
- * Rule that sorts conditions in a filter predicate to accelerate query processing
+ * Sorts conditions in a filter predicate to accelerate query processing
* based on selectivity and compute cost. Currently it is not applied recursively,
* i.e., it is only applied to top predicates in the condition.
*/
-public class HiveFilterSortPredicates extends RelOptRule {
+public class HiveFilterSortPredicates extends RelHomogeneousShuttle {
private static final Logger LOG = LoggerFactory.getLogger(HiveFilterSortPredicates.class);
private final AtomicInteger noColsMissingStats;
public HiveFilterSortPredicates(AtomicInteger noColsMissingStats) {
- super(
- operand(Filter.class,
- operand(RelNode.class, any())));
this.noColsMissingStats = noColsMissingStats;
}
@Override
- public boolean matches(RelOptRuleCall call) {
- final Filter filter = call.rel(0);
-
- HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
-
- // If this operator has been visited already by the rule,
- // we do not need to apply the optimization
- if (registry != null && registry.getVisited(this).contains(filter)) {
- return false;
+ public RelNode visit(RelNode other) {
+ RelNode visitedNode = super.visit(other);
+ if (visitedNode instanceof Filter) {
+ return rewriteFilter((Filter) visitedNode);
}
- return true;
+ return visitedNode;
}
- @Override
- public void onMatch(RelOptRuleCall call) {
+ private RelNode rewriteFilter(Filter matchNode) {
try {
- final Filter filter = call.rel(0);
- final RelNode input = call.rel(1);
-
- // Register that we have visited this operator in this rule
- HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
- if (registry != null) {
- registry.registerVisited(this, filter);
- }
+ final Filter filter = matchNode;
+ final RelNode input = filter.getInput();
final RexNode originalCond = filter.getCondition();
final RexSortPredicatesShuttle sortPredicatesShuttle = new RexSortPredicatesShuttle(
@@ -92,16 +76,12 @@ public class HiveFilterSortPredicates extends RelOptRule {
final RexNode newCond = originalCond.accept(sortPredicatesShuttle);
if (!sortPredicatesShuttle.modified) {
// We are done, bail out
- return;
+ return matchNode;
}
// We register the new filter so we do not fire the rule on it again
final Filter newFilter = filter.copy(filter.getTraitSet(), input, newCond);
- if (registry != null) {
- registry.registerVisited(this, newFilter);
- }
-
- call.transformTo(newFilter);
+ return newFilter;
}
catch (Exception e) {
if (noColsMissingStats.get() > 0) {
@@ -111,6 +91,7 @@ public class HiveFilterSortPredicates extends RelOptRule {
throw e;
}
}
+ return matchNode;
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 47fd2048781..765e2e46463 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1757,6 +1757,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 5. Apply post-join order optimizations
calcitePlan = applyPostJoinOrderingTransform(calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
+ calcitePlan = calcitePlan.accept(new HiveFilterSortPredicates(noColsMissingStats));
+ }
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after post-join transformations:\n" + RelOptUtil.toString(calcitePlan));
}
@@ -2265,11 +2268,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveWindowingLastValueRewrite.INSTANCE);
}
- // 6. Sort predicates in filter expressions
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
- generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
- new HiveFilterSortPredicates(noColsMissingStats));
- }
// 7. Apply Druid transformation rules
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
diff --git a/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out b/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
index c84efa0c6d5..1802545b44b 100644
--- a/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
+++ b/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
@@ -715,7 +715,7 @@ STAGE PLANS:
properties:
hive.sql.query SELECT "ikey"
FROM "EXTERNAL_JDBC_SIMPLE_DERBY2_TABLE1"
-WHERE "bkey" IN (10, 20) AND "dkey" IN (15.15, 25.25) AND (("bkey" = 10 AND "dkey" = 15.15 OR "bkey" = 20 AND "dkey" = 25.25) AND "ikey" IS NOT NULL)
+WHERE ("bkey" = 10 AND "dkey" = 15.15 OR "bkey" = 20 AND "dkey" = 25.25) AND "bkey" IN (10, 20) AND ("dkey" IN (15.15, 25.25) AND "ikey" IS NOT NULL)
hive.sql.query.fieldNames ikey
hive.sql.query.fieldTypes int
hive.sql.query.split true
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out
index b2523de9109..c2ac003ea42 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_ext_query1.q.out
@@ -6,7 +6,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveAggregate(group=[{0, 1}], agg#0=[sum($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
@@ -40,7 +40,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[{572.8904857896465 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveAggregate(group=[{0, 1}], agg#0=[sum($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[{8.033148661966447E9 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out
index ef163906622..1216481c762 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query1.q.out
@@ -6,7 +6,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100])
HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveAggregate(group=[{0, 1}], agg#0=[sum($2)])
HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out
index f93e90e7639..5af734f10a7 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out
@@ -11,7 +11,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$3], -=[-($2, $1)])
- HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
HiveProject(d_date_sk=[$0])
@@ -26,7 +26,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$3], -=[-($2, $1)])
- HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
HiveProject(d_date_sk=[$0])
@@ -40,7 +40,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], -=[-($2, $1)])
- HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
@@ -55,7 +55,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], -=[-($2, $1)])
- HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out
index 4f8e2f30e15..cf6b0aab079 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query31.q.out
@@ -42,7 +42,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 2000), =($10, 2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5])
+ HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5])
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ca_county=[$0], $f1=[$1])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out
index bbfcf4fce64..61b99c1ec82 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out
@@ -1,8 +1,8 @@
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(i_manufact_id=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(i_manufact_id=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_manufact_id=[$0], $f1=[$1])
HiveAggregate(group=[{10}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out
index 47a13934edf..1b1c470f644 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query34.q.out
@@ -4,7 +4,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
HiveFilter(condition=[BETWEEN(false, $2, 15:BIGINT, 20:BIGINT)])
HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
HiveAggregate(group=[{0, 3}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out
index 7de5297409b..1d48ae1b6f0 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query38.q.out
@@ -1,9 +1,9 @@
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
+ HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
+ HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out
index 7d291323b55..e84ab82c70e 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out
@@ -10,7 +10,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
@@ -27,7 +27,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
HiveProject(d_date_sk=[$0])
@@ -42,7 +42,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22])
HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
HiveProject(d_date_sk=[$0])
@@ -56,7 +56,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33])
HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
HiveProject(d_date_sk=[$0])
@@ -71,7 +71,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33])
HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
HiveProject(d_date_sk=[$0])
@@ -86,7 +86,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], /=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))])
HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33])
HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out
index 10e305058e0..1feeb5e1d87 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query54.q.out
@@ -33,7 +33,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveProject($f0=[+($3, 1)])
HiveFilter(condition=[AND(=($6, 1999), =($8, 3))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], $f0=[$5], $f1=[$6])
+ HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6])
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out
index 413324af04c..d5229761f43 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out
@@ -1,8 +1,8 @@
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(i_item_id=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{10}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out
index 483e67ce9b1..51d018d50ff 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query6.q.out
@@ -1,7 +1,7 @@
Warning: Map Join MAPJOIN[168][bigTable=?] in task 'Map 1' is a cross product
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(ca_state=[$0], $f1=[$1])
HiveFilter(condition=[>=($1, 10)])
HiveAggregate(group=[{4}], agg#0=[count()])
HiveJoin(condition=[=($7, $13)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out
index ab9511801fe..3109d4d558e 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out
@@ -1,8 +1,8 @@
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject(i_item_id=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{10}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out
index e0f272ac21c..d8f2a2f16af 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query65.q.out
@@ -6,7 +6,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveProject(s_store_sk=[$0], s_store_name=[$5])
HiveTableScan(table=[[default, store]], table:alias=[store])
HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveProject(ss_store_sk=[$0], ss_item_sk=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2])
HiveAggregate(group=[{0, 1}], agg#0=[sum($2)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out
index bf473fb5007..03c2ec79256 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query73.q.out
@@ -4,7 +4,7 @@ HiveSortLimit(sort0=[$5], dir0=[DESC])
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
HiveFilter(condition=[BETWEEN(false, $2, 1:BIGINT, 5:BIGINT)])
HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
HiveAggregate(group=[{0, 3}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out
index 20c5d34854f..f1ef1809e23 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query78.q.out
@@ -32,7 +32,7 @@ HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[
HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
HiveProject(cr_item_sk=[$1], cr_order_number=[$15])
HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns])
- HiveProject($f1=[$0], $f2=[$1], $f2_0=[$2], $f3=[$3], $f4=[$4])
+ HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
HiveFilter(condition=[>($2, 0)])
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)], agg#1=[sum($4)], agg#2=[sum($5)])
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out
index fc50acee081..d1bdb18108d 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query81.q.out
@@ -11,7 +11,7 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam
HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12])
HiveFilter(condition=[=($8, _UTF-16LE'IL')])
HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
- HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
HiveAggregate(group=[{1, 2}], agg#0=[sum($4)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out
index ab43070d7fa..b3ae8c6ce6c 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out
@@ -206,7 +206,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 21600036511 Data size: 5182756360536 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col3 is not null and _col0 is not null) (type: boolean)
+ predicate: (_col0 is not null and _col3 is not null) (type: boolean)
Statistics: Num rows: 21594643099 Data size: 5181462254384 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col3 (type: bigint), (_col2 - _col1) (type: decimal(8,2))
@@ -295,7 +295,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 86404891377 Data size: 19834337697608 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col3 is not null and _col0 is not null) (type: boolean)
+ predicate: (_col0 is not null and _col3 is not null) (type: boolean)
Statistics: Num rows: 82514936083 Data size: 18941394188296 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col3 (type: bigint), (_col2 - _col1) (type: decimal(8,2))
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out
index a430a23d8f5..d2d87dc9f22 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out
@@ -44,7 +44,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 86404891377 Data size: 38316552569400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col5 is not null and _col0 is not null) (type: boolean)
+ predicate: (_col0 is not null and _col5 is not null) (type: boolean)
Statistics: Num rows: 82514936083 Data size: 36591538231240 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col5 (type: bigint), ((((_col4 - _col3) - _col1) + _col2) / 2) (type: decimal(14,6))
@@ -96,7 +96,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 43220864887 Data size: 19956340213184 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col5 is not null and _col0 is not null) (type: boolean)
+ predicate: (_col0 is not null and _col5 is not null) (type: boolean)
Statistics: Num rows: 43007130172 Data size: 19857652630296 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col5 (type: bigint), ((((_col4 - _col3) - _col1) + _col2) / 2) (type: decimal(14,6))
@@ -205,7 +205,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 21600036511 Data size: 10019954898456 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col5 is not null and _col0 is not null) (type: boolean)
+ predicate: (_col0 is not null and _col5 is not null) (type: boolean)
Statistics: Num rows: 21594643099 Data size: 10017452970080 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), _col5 (type: bigint), ((((_col4 - _col3) - _col1) + _col2) / 2) (type: decimal(14,6))