You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/10/20 01:14:28 UTC
[hive] branch master updated: HIVE-21365: Refactor Hep planner
steps in CBO (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new c1e5fa3 HIVE-21365: Refactor Hep planner steps in CBO (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
c1e5fa3 is described below
commit c1e5fa3d80511ac625476b0a9c454783bd6c4ad1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Fri Mar 1 14:14:59 2019 -0800
HIVE-21365: Refactor Hep planner steps in CBO (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
Close apache/hive#630
---
.../hive/ql/optimizer/calcite/RelOptHiveTable.java | 40 +-
.../calcite/reloperators/HiveTableScan.java | 4 +-
.../rules/HiveAggregateJoinTransposeRule.java | 410 ++++++++--------
.../calcite/rules/HiveFieldTrimmerRule.java | 93 ++++
.../calcite/rules/HiveFilterSortPredicates.java | 61 ++-
.../calcite/stats/EstimateUniqueKeys.java | 3 +-
.../ql/optimizer/calcite/stats/HiveRelMdSize.java | 11 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 541 ++++++++++-----------
.../clientpositive/perf/tez/cbo_ext_query1.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query1.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query14.q.out | 12 +-
.../clientpositive/perf/tez/cbo_query23.q.out | 12 +-
.../clientpositive/perf/tez/cbo_query24.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query30.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query31.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query33.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query34.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query38.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query54.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query56.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query60.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query65.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query73.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query78.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query81.q.out | 4 +-
.../perf/tez/constraints/cbo_ext_query1.q.out | 4 +-
.../perf/tez/constraints/cbo_query1.q.out | 2 +-
.../perf/tez/constraints/cbo_query14.q.out | 6 +-
.../perf/tez/constraints/cbo_query23.q.out | 4 +-
.../perf/tez/constraints/cbo_query24.q.out | 2 +-
.../perf/tez/constraints/cbo_query30.q.out | 4 +-
.../perf/tez/constraints/cbo_query31.q.out | 2 +-
.../perf/tez/constraints/cbo_query33.q.out | 4 +-
.../perf/tez/constraints/cbo_query34.q.out | 2 +-
.../perf/tez/constraints/cbo_query38.q.out | 4 +-
.../perf/tez/constraints/cbo_query54.q.out | 2 +-
.../perf/tez/constraints/cbo_query56.q.out | 4 +-
.../perf/tez/constraints/cbo_query6.q.out | 2 +-
.../perf/tez/constraints/cbo_query60.q.out | 4 +-
.../perf/tez/constraints/cbo_query65.q.out | 2 +-
.../perf/tez/constraints/cbo_query73.q.out | 2 +-
.../perf/tez/constraints/cbo_query78.q.out | 2 +-
.../perf/tez/constraints/cbo_query81.q.out | 4 +-
43 files changed, 683 insertions(+), 604 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 0c5140b..001156a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -679,34 +679,19 @@ public class RelOptHiveTable implements RelOptTable {
/** Note: DOES NOT CHECK txn stats. */
public List<ColStatistics> getColStat(List<Integer> projIndxLst, boolean allowMissingStats) {
List<ColStatistics> colStatsBldr = Lists.newArrayList();
- Set<Integer> projIndxSet = new HashSet<Integer>(projIndxLst);
- if (projIndxLst != null) {
- for (Integer i : projIndxLst) {
- if (i >= noOfNonVirtualCols) {
- projIndxSet.remove(i);
- } else if (hiveColStatsMap.get(i) != null) {
- colStatsBldr.add(hiveColStatsMap.get(i));
- projIndxSet.remove(i);
- }
- }
- if (!projIndxSet.isEmpty()) {
- updateColStats(projIndxSet, allowMissingStats);
- for (Integer i : projIndxSet) {
- colStatsBldr.add(hiveColStatsMap.get(i));
- }
- }
- } else {
- List<Integer> pILst = new ArrayList<Integer>();
- for (Integer i = 0; i < noOfNonVirtualCols; i++) {
- if (hiveColStatsMap.get(i) == null) {
- pILst.add(i);
- }
+ Set<Integer> projIndxSet = new HashSet<>(projIndxLst);
+ for (Integer i : projIndxLst) {
+ if (i >= noOfNonVirtualCols) {
+ projIndxSet.remove(i);
+ } else if (hiveColStatsMap.get(i) != null) {
+ colStatsBldr.add(hiveColStatsMap.get(i));
+ projIndxSet.remove(i);
}
- if (!pILst.isEmpty()) {
- updateColStats(new HashSet<Integer>(pILst), allowMissingStats);
- for (Integer pi : pILst) {
- colStatsBldr.add(hiveColStatsMap.get(pi));
- }
+ }
+ if (!projIndxSet.isEmpty()) {
+ updateColStats(projIndxSet, allowMissingStats);
+ for (Integer i : projIndxSet) {
+ colStatsBldr.add(hiveColStatsMap.get(i));
}
}
@@ -718,7 +703,6 @@ public class RelOptHiveTable implements RelOptTable {
* all columns in BitSet are partition columns.
*/
public boolean containsPartitionColumnsOnly(ImmutableBitSet cols) {
-
for (int i = cols.nextSetBit(0); i >= 0; i++, i = cols.nextSetBit(i + 1)) {
if (!hivePartitionColsMap.containsKey(i)) {
return false;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
index d64db03..b045245 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
@@ -268,7 +268,9 @@ public class HiveTableScan extends TableScan implements HiveRelNode {
// Also include partition list key to trigger cost evaluation even if an
// expression was already generated.
public String computeDigest() {
- String digest = super.computeDigest() + "[" + this.isInsideView() + "]";
+ String digest = super.computeDigest() +
+ "[" + this.neededColIndxsFrmReloptHT + "]" +
+ "[" + this.isInsideView() + "]";
String partitionListKey = ((RelOptHiveTable) table).getPartitionListKey();
if (partitionListKey != null) {
return digest + "[" + partitionListKey + "]";
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
index ed6659c..b9409cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -24,6 +24,7 @@ import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptRuleCall;
@@ -55,6 +56,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Planner rule that pushes an
@@ -63,236 +66,241 @@ import com.google.common.collect.Lists;
*/
public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule {
- /** Extended instance of the rule that can push down aggregate functions. */
- public static final HiveAggregateJoinTransposeRule INSTANCE =
- new HiveAggregateJoinTransposeRule(HiveAggregate.class, HiveJoin.class,
- HiveRelFactories.HIVE_BUILDER, true);
+ private static final Logger LOG = LoggerFactory.getLogger(HiveAggregateJoinTransposeRule.class);
private final boolean allowFunctions;
+ private final AtomicInteger noColsMissingStats;
/** Creates an AggregateJoinTransposeRule that may push down functions. */
- private HiveAggregateJoinTransposeRule(Class<? extends Aggregate> aggregateClass,
- Class<? extends Join> joinClass,
- RelBuilderFactory relBuilderFactory,
- boolean allowFunctions) {
- super(aggregateClass, joinClass, relBuilderFactory, true);
- this.allowFunctions = allowFunctions;
+ public HiveAggregateJoinTransposeRule(AtomicInteger noColsMissingStats) {
+ super(HiveAggregate.class, HiveJoin.class, HiveRelFactories.HIVE_BUILDER, true);
+ this.allowFunctions = true;
+ this.noColsMissingStats = noColsMissingStats;
}
@Override
public void onMatch(RelOptRuleCall call) {
- final Aggregate aggregate = call.rel(0);
- final Join join = call.rel(1);
- final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
- final RelBuilder relBuilder = call.builder();
+ try {
+ final Aggregate aggregate = call.rel(0);
+ final Join join = call.rel(1);
+ final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
+ final RelBuilder relBuilder = call.builder();
- // If any aggregate functions do not support splitting, bail out
- // If any aggregate call has a filter, bail out
- for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
- if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class)
- == null) {
- return;
+ // If any aggregate functions do not support splitting, bail out
+ // If any aggregate call has a filter, bail out
+ for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
+ if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class)
+ == null) {
+ return;
+ }
+ if (aggregateCall.filterArg >= 0) {
+ return;
+ }
}
- if (aggregateCall.filterArg >= 0) {
+
+ // If it is not an inner join, we do not push the
+ // aggregate operator
+ if (join.getJoinType() != JoinRelType.INNER) {
return;
}
- }
- // If it is not an inner join, we do not push the
- // aggregate operator
- if (join.getJoinType() != JoinRelType.INNER) {
- return;
- }
-
- if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
- return;
- }
+ if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
+ return;
+ }
- // Do the columns used by the join appear in the output of the aggregate?
- final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
- final RelMetadataQuery mq = call.getMetadataQuery();
- final ImmutableBitSet keyColumns = keyColumns(aggregateColumns,
- mq.getPulledUpPredicates(join).pulledUpPredicates);
- final ImmutableBitSet joinColumns =
- RelOptUtil.InputFinder.bits(join.getCondition());
- final boolean allColumnsInAggregate =
- keyColumns.contains(joinColumns);
- final ImmutableBitSet belowAggregateColumns =
- aggregateColumns.union(joinColumns);
+ // Do the columns used by the join appear in the output of the aggregate?
+ final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
+ final RelMetadataQuery mq = call.getMetadataQuery();
+ final ImmutableBitSet keyColumns = keyColumns(aggregateColumns,
+ mq.getPulledUpPredicates(join).pulledUpPredicates);
+ final ImmutableBitSet joinColumns =
+ RelOptUtil.InputFinder.bits(join.getCondition());
+ final boolean allColumnsInAggregate =
+ keyColumns.contains(joinColumns);
+ final ImmutableBitSet belowAggregateColumns =
+ aggregateColumns.union(joinColumns);
- // Split join condition
- final List<Integer> leftKeys = Lists.newArrayList();
- final List<Integer> rightKeys = Lists.newArrayList();
- final List<Boolean> filterNulls = Lists.newArrayList();
- RexNode nonEquiConj =
- RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(),
- join.getCondition(), leftKeys, rightKeys, filterNulls);
- // If it contains non-equi join conditions, we bail out
- if (!nonEquiConj.isAlwaysTrue()) {
- return;
- }
-
- // Push each aggregate function down to each side that contains all of its
- // arguments. Note that COUNT(*), because it has no arguments, can go to
- // both sides.
- final Map<Integer, Integer> map = new HashMap<>();
- final List<Side> sides = new ArrayList<>();
- int uniqueCount = 0;
- int offset = 0;
- int belowOffset = 0;
- for (int s = 0; s < 2; s++) {
- final Side side = new Side();
- final RelNode joinInput = join.getInput(s);
- int fieldCount = joinInput.getRowType().getFieldCount();
- final ImmutableBitSet fieldSet =
- ImmutableBitSet.range(offset, offset + fieldCount);
- final ImmutableBitSet belowAggregateKeyNotShifted =
- belowAggregateColumns.intersect(fieldSet);
- for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
- map.put(c.e, belowOffset + c.i);
- }
- final ImmutableBitSet belowAggregateKey =
- belowAggregateKeyNotShifted.shift(-offset);
- final boolean unique;
- if (!allowFunctions) {
- assert aggregate.getAggCallList().isEmpty();
- // If there are no functions, it doesn't matter as much whether we
- // aggregate the inputs before the join, because there will not be
- // any functions experiencing a cartesian product effect.
- //
- // But finding out whether the input is already unique requires a call
- // to areColumnsUnique that currently (until [CALCITE-1048] "Make
- // metadata more robust" is fixed) places a heavy load on
- // the metadata system.
- //
- // So we choose to imagine the the input is already unique, which is
- // untrue but harmless.
- //
- unique = true;
- } else {
- final Boolean unique0 =
- mq.areColumnsUnique(joinInput, belowAggregateKey, true);
- unique = unique0 != null && unique0;
+ // Split join condition
+ final List<Integer> leftKeys = Lists.newArrayList();
+ final List<Integer> rightKeys = Lists.newArrayList();
+ final List<Boolean> filterNulls = Lists.newArrayList();
+ RexNode nonEquiConj =
+ RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(),
+ join.getCondition(), leftKeys, rightKeys, filterNulls);
+ // If it contains non-equi join conditions, we bail out
+ if (!nonEquiConj.isAlwaysTrue()) {
+ return;
}
- if (unique) {
- ++uniqueCount;
- side.newInput = joinInput;
- } else {
- List<AggregateCall> belowAggCalls = new ArrayList<>();
- final SqlSplittableAggFunction.Registry<AggregateCall>
- belowAggCallRegistry = registry(belowAggCalls);
- final Mappings.TargetMapping mapping =
- s == 0
- ? Mappings.createIdentity(fieldCount)
- : Mappings.createShiftMapping(fieldCount + offset, 0, offset,
- fieldCount);
- for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
- final SqlAggFunction aggregation = aggCall.e.getAggregation();
- final SqlSplittableAggFunction splitter =
- Preconditions.checkNotNull(
- aggregation.unwrap(SqlSplittableAggFunction.class));
- final AggregateCall call1;
- if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
- call1 = splitter.split(aggCall.e, mapping);
- } else {
- call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
- }
- if (call1 != null) {
- side.split.put(aggCall.i,
- belowAggregateKey.cardinality()
- + belowAggCallRegistry.register(call1));
+
+ // Push each aggregate function down to each side that contains all of its
+ // arguments. Note that COUNT(*), because it has no arguments, can go to
+ // both sides.
+ final Map<Integer, Integer> map = new HashMap<>();
+ final List<Side> sides = new ArrayList<>();
+ int uniqueCount = 0;
+ int offset = 0;
+ int belowOffset = 0;
+ for (int s = 0; s < 2; s++) {
+ final Side side = new Side();
+ final RelNode joinInput = join.getInput(s);
+ int fieldCount = joinInput.getRowType().getFieldCount();
+ final ImmutableBitSet fieldSet =
+ ImmutableBitSet.range(offset, offset + fieldCount);
+ final ImmutableBitSet belowAggregateKeyNotShifted =
+ belowAggregateColumns.intersect(fieldSet);
+ for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
+ map.put(c.e, belowOffset + c.i);
+ }
+ final ImmutableBitSet belowAggregateKey =
+ belowAggregateKeyNotShifted.shift(-offset);
+ final boolean unique;
+ if (!allowFunctions) {
+ assert aggregate.getAggCallList().isEmpty();
+ // If there are no functions, it doesn't matter as much whether we
+ // aggregate the inputs before the join, because there will not be
+ // any functions experiencing a cartesian product effect.
+ //
+ // But finding out whether the input is already unique requires a call
+ // to areColumnsUnique that currently (until [CALCITE-1048] "Make
+ // metadata more robust" is fixed) places a heavy load on
+ // the metadata system.
+ //
+ // So we choose to imagine the the input is already unique, which is
+ // untrue but harmless.
+ //
+ unique = true;
+ } else {
+ final Boolean unique0 =
+ mq.areColumnsUnique(joinInput, belowAggregateKey, true);
+ unique = unique0 != null && unique0;
+ }
+ if (unique) {
+ ++uniqueCount;
+ side.newInput = joinInput;
+ } else {
+ List<AggregateCall> belowAggCalls = new ArrayList<>();
+ final SqlSplittableAggFunction.Registry<AggregateCall>
+ belowAggCallRegistry = registry(belowAggCalls);
+ final Mappings.TargetMapping mapping =
+ s == 0
+ ? Mappings.createIdentity(fieldCount)
+ : Mappings.createShiftMapping(fieldCount + offset, 0, offset,
+ fieldCount);
+ for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
+ final SqlAggFunction aggregation = aggCall.e.getAggregation();
+ final SqlSplittableAggFunction splitter =
+ Preconditions.checkNotNull(
+ aggregation.unwrap(SqlSplittableAggFunction.class));
+ final AggregateCall call1;
+ if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
+ call1 = splitter.split(aggCall.e, mapping);
+ } else {
+ call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
+ }
+ if (call1 != null) {
+ side.split.put(aggCall.i,
+ belowAggregateKey.cardinality()
+ + belowAggCallRegistry.register(call1));
+ }
}
+ side.newInput = relBuilder.push(joinInput)
+ .aggregate(relBuilder.groupKey(belowAggregateKey, null),
+ belowAggCalls)
+ .build();
}
- side.newInput = relBuilder.push(joinInput)
- .aggregate(relBuilder.groupKey(belowAggregateKey, null),
- belowAggCalls)
- .build();
+ offset += fieldCount;
+ belowOffset += side.newInput.getRowType().getFieldCount();
+ sides.add(side);
}
- offset += fieldCount;
- belowOffset += side.newInput.getRowType().getFieldCount();
- sides.add(side);
- }
- if (uniqueCount == 2) {
- // Both inputs to the join are unique. There is nothing to be gained by
- // this rule. In fact, this aggregate+join may be the result of a previous
- // invocation of this rule; if we continue we might loop forever.
- return;
- }
+ if (uniqueCount == 2) {
+ // Both inputs to the join are unique. There is nothing to be gained by
+ // this rule. In fact, this aggregate+join may be the result of a previous
+ // invocation of this rule; if we continue we might loop forever.
+ return;
+ }
- // Update condition
- final Mapping mapping = (Mapping) Mappings.target(
- map::get,
- join.getRowType().getFieldCount(),
- belowOffset);
- final RexNode newCondition =
- RexUtil.apply(mapping, join.getCondition());
+ // Update condition
+ final Mapping mapping = (Mapping) Mappings.target(
+ map::get,
+ join.getRowType().getFieldCount(),
+ belowOffset);
+ final RexNode newCondition =
+ RexUtil.apply(mapping, join.getCondition());
- // Create new join
- relBuilder.push(sides.get(0).newInput)
- .push(sides.get(1).newInput)
- .join(join.getJoinType(), newCondition);
+ // Create new join
+ relBuilder.push(sides.get(0).newInput)
+ .push(sides.get(1).newInput)
+ .join(join.getJoinType(), newCondition);
- // Aggregate above to sum up the sub-totals
- final List<AggregateCall> newAggCalls = new ArrayList<>();
- final int groupIndicatorCount =
- aggregate.getGroupCount() + aggregate.getIndicatorCount();
- final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
- final List<RexNode> projects =
- new ArrayList<>(
- rexBuilder.identityProjects(relBuilder.peek().getRowType()));
- for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
- final SqlAggFunction aggregation = aggCall.e.getAggregation();
- final SqlSplittableAggFunction splitter =
- Preconditions.checkNotNull(
- aggregation.unwrap(SqlSplittableAggFunction.class));
- final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
- final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
- newAggCalls.add(
- splitter.topSplit(rexBuilder, registry(projects),
- groupIndicatorCount, relBuilder.peek().getRowType(), aggCall.e,
- leftSubTotal == null ? -1 : leftSubTotal,
- rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
- }
+ // Aggregate above to sum up the sub-totals
+ final List<AggregateCall> newAggCalls = new ArrayList<>();
+ final int groupIndicatorCount =
+ aggregate.getGroupCount() + aggregate.getIndicatorCount();
+ final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
+ final List<RexNode> projects =
+ new ArrayList<>(
+ rexBuilder.identityProjects(relBuilder.peek().getRowType()));
+ for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
+ final SqlAggFunction aggregation = aggCall.e.getAggregation();
+ final SqlSplittableAggFunction splitter =
+ Preconditions.checkNotNull(
+ aggregation.unwrap(SqlSplittableAggFunction.class));
+ final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
+ final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
+ newAggCalls.add(
+ splitter.topSplit(rexBuilder, registry(projects),
+ groupIndicatorCount, relBuilder.peek().getRowType(), aggCall.e,
+ leftSubTotal == null ? -1 : leftSubTotal,
+ rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
+ }
- relBuilder.project(projects);
+ relBuilder.project(projects);
- boolean aggConvertedToProjects = false;
- if (allColumnsInAggregate) {
- // let's see if we can convert aggregate into projects
- List<RexNode> projects2 = new ArrayList<>();
- for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
- projects2.add(relBuilder.field(key));
- }
- for (AggregateCall newAggCall : newAggCalls) {
- final SqlSplittableAggFunction splitter =
- newAggCall.getAggregation().unwrap(SqlSplittableAggFunction.class);
- if (splitter != null) {
- final RelDataType rowType = relBuilder.peek().getRowType();
- projects2.add(splitter.singleton(rexBuilder, rowType, newAggCall));
+ boolean aggConvertedToProjects = false;
+ if (allColumnsInAggregate) {
+ // let's see if we can convert aggregate into projects
+ List<RexNode> projects2 = new ArrayList<>();
+ for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
+ projects2.add(relBuilder.field(key));
+ }
+ for (AggregateCall newAggCall : newAggCalls) {
+ final SqlSplittableAggFunction splitter =
+ newAggCall.getAggregation().unwrap(SqlSplittableAggFunction.class);
+ if (splitter != null) {
+ final RelDataType rowType = relBuilder.peek().getRowType();
+ projects2.add(splitter.singleton(rexBuilder, rowType, newAggCall));
+ }
+ }
+ if (projects2.size()
+ == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
+ // We successfully converted agg calls into projects.
+ relBuilder.project(projects2);
+ aggConvertedToProjects = true;
}
}
- if (projects2.size()
- == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
- // We successfully converted agg calls into projects.
- relBuilder.project(projects2);
- aggConvertedToProjects = true;
- }
- }
- if (!aggConvertedToProjects) {
- relBuilder.aggregate(
- relBuilder.groupKey(Mappings.apply(mapping, aggregate.getGroupSet()),
- Mappings.apply2(mapping, aggregate.getGroupSets())),
- newAggCalls);
- }
+ if (!aggConvertedToProjects) {
+ relBuilder.aggregate(
+ relBuilder.groupKey(Mappings.apply(mapping, aggregate.getGroupSet()),
+ Mappings.apply2(mapping, aggregate.getGroupSets())),
+ newAggCalls);
+ }
- // Make a cost based decision to pick cheaper plan
- RelNode r = relBuilder.build();
- RelOptCost afterCost = mq.getCumulativeCost(r);
- RelOptCost beforeCost = mq.getCumulativeCost(aggregate);
- if (afterCost.isLt(beforeCost)) {
- call.transformTo(r);
+ // Make a cost based decision to pick cheaper plan
+ RelNode r = relBuilder.build();
+ RelOptCost afterCost = mq.getCumulativeCost(r);
+ RelOptCost beforeCost = mq.getCumulativeCost(aggregate);
+ if (afterCost.isLt(beforeCost)) {
+ call.transformTo(r);
+ }
+ } catch (Exception e) {
+ if (noColsMissingStats.get() > 0) {
+ LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO");
+ noColsMissingStats.set(0);
+ } else {
+ throw e;
+ }
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
new file mode 100644
index 0000000..ac050df
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgram;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+
+
+/**
+ * Rule that triggers the field trimmer on the root of a plan.
+ */
+public class HiveFieldTrimmerRule extends RelOptRule {
+
+ private static final HepProgram PROGRAM = new HepProgramBuilder()
+ .addRuleInstance(HiveHepExtractRelNodeRule.INSTANCE)
+ .build();
+
+ private final boolean fetchStats;
+ private boolean triggered;
+
+ public HiveFieldTrimmerRule(boolean fetchStats) {
+ super(operand(RelNode.class, any()),
+ HiveRelFactories.HIVE_BUILDER, "HiveFieldTrimmerRule");
+ this.fetchStats = fetchStats;
+ triggered = false;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ if (triggered) {
+ // Bail out
+ return;
+ }
+
+ RelNode node = call.rel(0);
+ final HepRelVertex root = (HepRelVertex) call.getPlanner().getRoot();
+ if (root.getCurrentRel() != node) {
+ // Bail out
+ return;
+ }
+ // The node is the root, release the kraken!
+ final HepPlanner tmpPlanner = new HepPlanner(PROGRAM);
+ tmpPlanner.setRoot(node);
+ node = tmpPlanner.findBestExp();
+ final HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
+ relBuilderFactory.create(node.getCluster(), null), fetchStats);
+ call.transformTo(fieldTrimmer.trim(node));
+ triggered = true;
+ }
+
+
+ /**
+ * The goal of this rule is to extract the RelNode from the
+ * HepRelVertex node so the trimmer can be applied correctly.
+ */
+ private static class HiveHepExtractRelNodeRule extends RelOptRule {
+
+ private static final HiveHepExtractRelNodeRule INSTANCE =
+ new HiveHepExtractRelNodeRule();
+
+ private HiveHepExtractRelNodeRule() {
+ super(operand(HepRelVertex.class, any()));
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final HepRelVertex rel = call.rel(0);
+ call.transformTo(rel.getCurrentRel());
+ }
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
index df75e93..3b35570 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
@@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import java.util.Comparator;
import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
@@ -35,6 +36,8 @@ import org.apache.calcite.rex.RexVisitorImpl;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.FilterSelectivityEstimator;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
@@ -44,13 +47,15 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize;
*/
public class HiveFilterSortPredicates extends RelOptRule {
- public static final HiveFilterSortPredicates INSTANCE = new HiveFilterSortPredicates();
+ private static final Logger LOG = LoggerFactory.getLogger(HiveFilterSortPredicates.class);
+ private final AtomicInteger noColsMissingStats;
- private HiveFilterSortPredicates() {
+ public HiveFilterSortPredicates(AtomicInteger noColsMissingStats) {
super(
operand(Filter.class,
operand(RelNode.class, any())));
+ this.noColsMissingStats = noColsMissingStats;
}
@Override
@@ -70,31 +75,41 @@ public class HiveFilterSortPredicates extends RelOptRule {
@Override
public void onMatch(RelOptRuleCall call) {
- final Filter filter = call.rel(0);
- final RelNode input = call.rel(1);
+ try {
+ final Filter filter = call.rel(0);
+ final RelNode input = call.rel(1);
+
+ // Register that we have visited this operator in this rule
+ HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
+ if (registry != null) {
+ registry.registerVisited(this, filter);
+ }
- // Register that we have visited this operator in this rule
- HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
- if (registry != null) {
- registry.registerVisited(this, filter);
- }
+ final RexNode originalCond = filter.getCondition();
+ final RexSortPredicatesShuttle sortPredicatesShuttle = new RexSortPredicatesShuttle(
+ input, filter.getCluster().getMetadataQuery());
+ final RexNode newCond = originalCond.accept(sortPredicatesShuttle);
+ if (!sortPredicatesShuttle.modified) {
+ // We are done, bail out
+ return;
+ }
- final RexNode originalCond = filter.getCondition();
- RexSortPredicatesShuttle sortPredicatesShuttle = new RexSortPredicatesShuttle(
- input, filter.getCluster().getMetadataQuery());
- final RexNode newCond = originalCond.accept(sortPredicatesShuttle);
- if (!sortPredicatesShuttle.modified) {
- // We are done, bail out
- return;
- }
+ // We register the new filter so we do not fire the rule on it again
+ final Filter newFilter = filter.copy(filter.getTraitSet(), input, newCond);
+ if (registry != null) {
+ registry.registerVisited(this, newFilter);
+ }
- // We register the new filter so we do not fire the rule on it again
- final Filter newFilter = filter.copy(filter.getTraitSet(), input, newCond);
- if (registry != null) {
- registry.registerVisited(this, newFilter);
+ call.transformTo(newFilter);
+ }
+ catch (Exception e) {
+ if (noColsMissingStats.get() > 0) {
+ LOG.warn("Missing column stats (see previous messages), skipping sort predicates in filter expressions in CBO");
+ noColsMissingStats.set(0);
+ } else {
+ throw e;
+ }
}
-
- call.transformTo(newFilter);
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
index 4aba098..e36259c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
@@ -95,8 +95,7 @@ public final class EstimateUniqueKeys {
RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
double numRows = mq.getRowCount(tScan);
- List<ColStatistics> colStats = tScan.getColStat(BitSets
- .toList(projectedCols));
+ List<ColStatistics> colStats = tScan.getColStat(BitSets.toList(projectedCols));
Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
colStatsPos = 0;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
index 893cb99..75a903c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
@@ -59,8 +59,9 @@ public class HiveRelMdSize extends RelMdSize {
// Obtain list of col stats, or use default if they are not available
final ImmutableList.Builder<Double> list = ImmutableList.builder();
int indxRqdCol = 0;
+ int nNoVirtualColumns = ((RelOptHiveTable) scan.getTable()).getNoOfNonVirtualCols();
int nFields = scan.getRowType().getFieldCount();
- for (int i = 0; i < nFields; i++) {
+ for (int i = 0; i < nNoVirtualColumns; i++) {
if (neededcolsLst.contains(i)) {
ColStatistics columnStatistic = columnStatistics.get(indxRqdCol);
indxRqdCol++;
@@ -74,6 +75,14 @@ public class HiveRelMdSize extends RelMdSize {
list.add(Double.valueOf(0));
}
}
+ for (int i = nNoVirtualColumns; i < nFields; i++) {
+ if (neededcolsLst.contains(i)) {
+ RelDataTypeField field = scan.getRowType().getFieldList().get(i);
+ list.add(averageTypeValueSize(field.getType()));
+ } else {
+ list.add(Double.valueOf(0));
+ }
+ }
return list.build();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index ef2ebac..b514aa1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -167,6 +167,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateSplitRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFieldTrimmerRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule;
@@ -1810,7 +1811,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
}
calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
- new HiveSubQueryRemoveRule(conf));
+ HepMatchOrder.DEPTH_FIRST, new HiveSubQueryRemoveRule(conf));
if (LOG.isDebugEnabled()) {
LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
}
@@ -1835,7 +1836,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 2. Apply pre-join order optimizations
calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan,
- mdProvider.getMetadataProvider(), executorProvider);
+ mdProvider.getMetadataProvider(), executorProvider);
// 3. Materialized view based rewriting
// We disable it for CTAS and MV creation queries (trying to avoid any problem
@@ -1846,210 +1847,20 @@ public class CalcitePlanner extends SemanticAnalyzer {
calcitePreCboPlan, mdProvider.getMetadataProvider(), executorProvider);
}
- // Get rid of sq_count_check if group by key is constant
- if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calcitePreCboPlan =
- hepPlan(calcitePreCboPlan, false, mdProvider.getMetadataProvider(), null,
- HiveRemoveSqCountCheck.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removing sq_count_check UDF ");
- }
-
-
// 4. Apply join order optimizations: reordering MST algorithm
// If join optimizations failed because of missing stats, we continue with
// the rest of optimizations
if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
-
- // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin
- calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider,
- HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN,
- HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, HiveProjectMergeRule.INSTANCE);
- try {
- List<RelMetadataProvider> list = Lists.newArrayList();
- list.add(mdProvider.getMetadataProvider());
- RelTraitSet desiredTraits = optCluster
- .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY);
-
- HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
- hepPgmBldr.addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class));
- hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER));
-
- HepProgram hepPgm = hepPgmBldr.build();
- HepPlanner hepPlanner = new HepPlanner(hepPgm);
-
- hepPlanner.registerMetadataProviders(list);
- RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
- optCluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
-
- RelNode rootRel = calcitePreCboPlan;
- hepPlanner.setRoot(rootRel);
- if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) {
- rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits);
- }
- hepPlanner.setRoot(rootRel);
-
- calciteOptimizedPlan = hepPlanner.findBestExp();
- } catch (Exception e) {
- boolean isMissingStats = noColsMissingStats.get() > 0;
- if (isMissingStats) {
- LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO");
- noColsMissingStats.set(0);
- calciteOptimizedPlan = calcitePreCboPlan;
- disableSemJoinReordering = false;
- } else {
- throw e;
- }
- }
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering");
+ calciteOptimizedPlan = applyJoinOrderingTransform(calcitePreCboPlan,
+ mdProvider.getMetadataProvider(), executorProvider);
} else {
calciteOptimizedPlan = calcitePreCboPlan;
disableSemJoinReordering = false;
}
- // 5. Run other optimizations that do not need stats
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE,
- HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 1");
-
- // 6. Run aggregate-join transpose (cost based)
- // If it failed because of missing stats, we continue with
- // the rest of optimizations
- if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- try {
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveAggregateJoinTransposeRule.INSTANCE);
- } catch (Exception e) {
- boolean isMissingStats = noColsMissingStats.get() > 0;
- if (isMissingStats) {
- LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO");
- noColsMissingStats.set(0);
- } else {
- throw e;
- }
- }
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Aggregate join transpose");
- }
-
- // 7.convert Join + GBy to semijoin
- // run this rule at later stages, since many calcite rules cant deal with semijoin
- if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HiveSemiJoinRule.INSTANCE_PROJECT, HiveSemiJoinRule.INSTANCE_PROJECT_SWAPPED, HiveSemiJoinRule.INSTANCE_AGGREGATE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Semijoin conversion");
- }
-
- // 8. convert SemiJoin + GBy to SemiJoin
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HiveRemoveGBYSemiJoinRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removal of gby from semijoin");
-
- // 9. Run rule to fix windowing issue when it is done over
- // aggregation columns (HIVE-10627)
- if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule");
- }
-
- // 10. Sort predicates in filter expressions
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- try {
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveFilterSortPredicates.INSTANCE);
- } catch (Exception e) {
- boolean isMissingStats = noColsMissingStats.get() > 0;
- if (isMissingStats) {
- LOG.warn("Missing column stats (see previous messages), " +
- "skipping sort predicates in filter expressions in CBO");
- noColsMissingStats.set(0);
- } else {
- throw e;
- }
- }
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Sort predicates within filter operators");
- }
-
- // 11. Apply Druid and JDBC transformation rules
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP,
- HiveDruidRules.FILTER_DATE_RANGE_RULE,
- HiveDruidRules.FILTER, HiveDruidRules.PROJECT_FILTER_TRANSPOSE,
- HiveDruidRules.AGGREGATE_FILTER_TRANSPOSE,
- HiveDruidRules.AGGREGATE_PROJECT,
- HiveDruidRules.PROJECT,
- HiveDruidRules.EXPAND_SINGLE_DISTINCT_AGGREGATES_DRUID_RULE,
- HiveDruidRules.AGGREGATE,
- HiveDruidRules.POST_AGGREGATION_PROJECT,
- HiveDruidRules.FILTER_AGGREGATE_TRANSPOSE,
- HiveDruidRules.FILTER_PROJECT_TRANSPOSE,
- HiveDruidRules.HAVING_FILTER_RULE,
- HiveDruidRules.SORT_PROJECT_TRANSPOSE,
- HiveDruidRules.SORT,
- HiveDruidRules.PROJECT_SORT_TRANSPOSE
- );
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Druid transformation rules");
-
- if (conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_PUSHDOWN)) {
- List<RelOptRule> rules = Lists.newArrayList();
- rules.add(JDBCExpandExpressionsRule.FILTER_INSTANCE);
- rules.add(JDBCExpandExpressionsRule.JOIN_INSTANCE);
- rules.add(JDBCExpandExpressionsRule.PROJECT_INSTANCE);
- rules.add(JDBCExtractJoinFilterRule.INSTANCE);
- rules.add(JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_JOIN);
- rules.add(JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_CONVERTER);
- rules.add(JDBCFilterJoinRule.INSTANCE);
- rules.add(JDBCFilterPushDownRule.INSTANCE);
- rules.add(JDBCProjectPushDownRule.INSTANCE);
- if (!conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_SAFE_PUSHDOWN)) {
- rules.add(JDBCJoinPushDownRule.INSTANCE);
- rules.add(JDBCUnionPushDownRule.INSTANCE);
- rules.add(JDBCAggregationPushDownRule.INSTANCE);
- rules.add(JDBCSortPushDownRule.INSTANCE);
- }
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.TOP_DOWN, rules.toArray(new RelOptRule[rules.size()]));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: JDBC transformation rules");
- }
-
- // 12. Run rules to aid in translation from Calcite tree to Hive tree
- if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- // 12.1. Merge join into multijoin operators (if possible)
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
- HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
- HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
- HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
- // The previous rules can pull up projections through join operators,
- // thus we run the field trimmer again to push them back down
- fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(optCluster, null));
- calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE,
- new ProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER));
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null,
- HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
- HiveProjectFilterPullUpConstantsRule.INSTANCE);
-
- // 12.2. Introduce exchange operators below join/multijoin operators
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN,
- HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Translation from Calcite tree to Hive tree");
- }
+ // 5. Apply post-join order optimizations
+ calciteOptimizedPlan = applyPostJoinOrderingTransform(calciteOptimizedPlan,
+ mdProvider.getMetadataProvider(), executorProvider);
if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
LOG.debug("CBO Planning details:\n");
@@ -2084,37 +1895,26 @@ public class CalcitePlanner extends SemanticAnalyzer {
final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT);
final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+ final HepProgramBuilder program = new HepProgramBuilder();
+
//0. SetOp rewrite
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP,
+ generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
HiveProjectOverIntersectRemoveRule.INSTANCE, HiveIntersectMergeRule.INSTANCE,
HiveUnionMergeRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: HiveProjectOverIntersectRemoveRule, HiveIntersectMerge and HiveUnionMergeRule rules");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveIntersectRewriteRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: HiveIntersectRewrite rule");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveExceptRewriteRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: HiveExceptRewrite rule");
//1. Distinct aggregate rewrite
// Run this optimization early, since it is expanding the operator pipeline.
if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
// Its not clear, if this rewrite is always performant on MR, since extra map phase
// introduced for 2nd MR job may offset gains of this multi-stage aggregation.
// We need a cost model for MR to enable this on MR.
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveExpandDistinctAggregatesRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite");
+ generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
+ HiveExpandDistinctAggregatesRule.INSTANCE);
}
// 2. Try factoring out common filter elements & separating deterministic
@@ -2122,11 +1922,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
// add on-clauses for old style Join Syntax
// Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or
// ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.ARBITRARY,
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
new HivePreFilteringRule(maxCNFNodeCount));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF");
// 3. Run exhaustive PPD, add not null filters, transitive inference,
// constant propagation, constant folding
@@ -2170,35 +1967,29 @@ public class CalcitePlanner extends SemanticAnalyzer {
rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE);
rules.add(HiveUnionPullUpConstantsRule.INSTANCE);
rules.add(HiveAggregatePullUpConstantsRule.INSTANCE);
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
- rules.toArray(new RelOptRule[rules.size()]));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding");
+ generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
+ rules.toArray(new RelOptRule[rules.size()]));
// 4. Push down limit through outer join
// NOTE: We run this after PPD to support old style join syntax.
// Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
// ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
// This should be a cost based decision, but till we enable the extended cost
// model, we will use the given value for the variable
final float reductionProportion = HiveConf.getFloatVar(conf,
HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE);
final long reductionTuples = HiveConf.getLongVar(conf,
HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE,
- HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE,
- HiveSortUnionReduceRule.INSTANCE);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
+ generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
+ HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE,
+ HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE);
+ generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
new HiveSortRemoveRule(reductionProportion, reductionTuples),
HiveProjectSortTransposeRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Push down limit through outer join");
}
- // 5. Push Down Semi Joins
+ // Push Down Semi Joins
//TODO: Enable this later
/*perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE,
@@ -2206,38 +1997,39 @@ public class CalcitePlanner extends SemanticAnalyzer {
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, Push Down Semi Joins"); */
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider,
+ // 5. Try to remove limit and order by
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveSortLimitRemoveRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Trying to remove Limit and Order by");
// 6. Apply Partition Pruning
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Partition Pruning");
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ new HivePartitionPruneRule(conf));
// 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP)
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(cluster, null), true);
- basePlan = fieldTrimmer.trim(basePlan);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Projection Pruning");
+ generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN,
+ new HiveFieldTrimmerRule(true));
// 8. Rerun PPD through Project as column pruning would have introduced
// DT above scans; By pushing filter just above TS, Hive can push it into
// storage (incase there are filters on non partition cols). This only
// matches FIL-PROJ-TS
// Also merge, remove and reduce Project if possible
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider,
+ generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
HiveProjectFilterPullUpConstantsRule.INSTANCE, HiveProjectMergeRule.INSTANCE,
ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE);
+
+ // 9. Get rid of sq_count_check if group by key is constant
+ if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveRemoveSqCountCheck.INSTANCE);
+ }
+
+ // Trigger program
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
+ basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Rerun PPD");
+ "Calcite: Prejoin ordering transformation");
return basePlan;
}
@@ -2332,14 +2124,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
if (mvRebuild) {
// If it is a materialized view rebuild, we use the HepPlanner, since we only have
// one MV and we would like to use it to create incremental maintenance plans
- HepPlanner hepPlanner = createHepPlanner(basePlan.getCluster(), true, mdProvider, null,
- HepMatchOrder.TOP_DOWN, HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES);
+ final HepProgramBuilder program = new HepProgramBuilder();
+ generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
+ HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES);
// Add materialization for rebuild to planner
assert materializations.size() == 1;
- hepPlanner.addMaterialization(materializations.get(0));
// Optimize plan
- hepPlanner.setRoot(basePlan);
- basePlan = hepPlanner.findBestExp();
+ basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider, materializations);
} else {
// If this is not a rebuild, we use Volcano planner as the decision
// on whether to use MVs or not and which MVs to use should be cost-based
@@ -2391,15 +2182,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
visitor.go(basePlan);
if (visitor.isRewritingAllowed()) {
// Trigger rewriting to remove UNION branch with MV
+ final HepProgramBuilder program = new HepProgramBuilder();
if (visitor.isContainsAggregate()) {
- basePlan = hepPlan(basePlan, false, mdProvider, null,
- HepMatchOrder.TOP_DOWN, HiveAggregateIncrementalRewritingRule.INSTANCE);
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveAggregateIncrementalRewritingRule.INSTANCE);
mvRebuildMode = MaterializationRebuildMode.AGGREGATE_REBUILD;
} else {
- basePlan = hepPlan(basePlan, false, mdProvider, null,
- HepMatchOrder.TOP_DOWN, HiveNoAggregateIncrementalRewritingRule.INSTANCE);
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveNoAggregateIncrementalRewritingRule.INSTANCE);
mvRebuildMode = MaterializationRebuildMode.NO_AGGREGATE_REBUILD;
}
+ basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
}
}
} else if (useMaterializedViewsRegistry) {
@@ -2437,6 +2230,174 @@ public class CalcitePlanner extends SemanticAnalyzer {
return basePlan;
}
+ /**
+ * Perform join reordering optimization.
+ *
+ * @param basePlan
+ * original plan
+ * @param mdProvider
+ * meta data provider
+ * @param executorProvider
+ * executor
+ * @return
+ */
+ private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
+ PerfLogger perfLogger = SessionState.getPerfLogger();
+
+ final HepProgramBuilder program = new HepProgramBuilder();
+ // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin
+ generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
+ HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN, HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN,
+ HiveProjectMergeRule.INSTANCE);
+ // Join reordering
+ generatePartialProgram(program, false, HepMatchOrder.BOTTOM_UP,
+ new JoinToMultiJoinRule(HiveJoin.class), new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER));
+
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
+ RelNode calciteOptimizedPlan;
+ try {
+ calciteOptimizedPlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
+ } catch (Exception e) {
+ if (noColsMissingStats.get() > 0) {
+ LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO");
+ noColsMissingStats.set(0);
+ calciteOptimizedPlan = basePlan;
+ disableSemJoinReordering = false;
+ } else {
+ throw e;
+ }
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering");
+
+ return calciteOptimizedPlan;
+ }
+
+ /**
+ * Perform join reordering post-optimization.
+ *
+ * @param basePlan
+ * original plan
+ * @param mdProvider
+ * meta data provider
+ * @param executorProvider
+ * executor
+ * @return
+ */
+ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
+ PerfLogger perfLogger = SessionState.getPerfLogger();
+
+ final HepProgramBuilder program = new HepProgramBuilder();
+
+ // 1. Run other optimizations that do not need stats
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE,
+ HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE,
+ HiveJoinCommuteRule.INSTANCE);
+
+ // 2. Run aggregate-join transpose (cost based)
+ // If it failed because of missing stats, we continue with
+ // the rest of optimizations
+ if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) {
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ new HiveAggregateJoinTransposeRule(noColsMissingStats));
+ }
+
+ // 3. Convert Join + GBy to semijoin
+ // Run this rule at later stages, since many calcite rules cant deal with semijoin
+ if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) {
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveSemiJoinRule.INSTANCE_PROJECT, HiveSemiJoinRule.INSTANCE_PROJECT_SWAPPED,
+ HiveSemiJoinRule.INSTANCE_AGGREGATE);
+ }
+
+ // 4. convert SemiJoin + GBy to SemiJoin
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveRemoveGBYSemiJoinRule.INSTANCE);
+
+ // 5. Run rule to fix windowing issue when it is done over
+ // aggregation columns (HIVE-10627)
+ if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) {
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveWindowingFixRule.INSTANCE);
+ }
+
+ // 6. Sort predicates in filter expressions
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ new HiveFilterSortPredicates(noColsMissingStats));
+ }
+
+ // 7. Apply Druid transformation rules
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveDruidRules.FILTER_DATE_RANGE_RULE,
+ HiveDruidRules.FILTER, HiveDruidRules.PROJECT_FILTER_TRANSPOSE,
+ HiveDruidRules.AGGREGATE_FILTER_TRANSPOSE,
+ HiveDruidRules.AGGREGATE_PROJECT,
+ HiveDruidRules.PROJECT,
+ HiveDruidRules.EXPAND_SINGLE_DISTINCT_AGGREGATES_DRUID_RULE,
+ HiveDruidRules.AGGREGATE,
+ HiveDruidRules.POST_AGGREGATION_PROJECT,
+ HiveDruidRules.FILTER_AGGREGATE_TRANSPOSE,
+ HiveDruidRules.FILTER_PROJECT_TRANSPOSE,
+ HiveDruidRules.HAVING_FILTER_RULE,
+ HiveDruidRules.SORT_PROJECT_TRANSPOSE,
+ HiveDruidRules.SORT,
+ HiveDruidRules.PROJECT_SORT_TRANSPOSE);
+
+ // 8. Apply JDBC transformation rules
+ if (conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_PUSHDOWN)) {
+ List<RelOptRule> rules = Lists.newArrayList();
+ rules.add(JDBCExpandExpressionsRule.FILTER_INSTANCE);
+ rules.add(JDBCExpandExpressionsRule.JOIN_INSTANCE);
+ rules.add(JDBCExpandExpressionsRule.PROJECT_INSTANCE);
+ rules.add(JDBCExtractJoinFilterRule.INSTANCE);
+ rules.add(JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_JOIN);
+ rules.add(JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_CONVERTER);
+ rules.add(JDBCFilterJoinRule.INSTANCE);
+ rules.add(JDBCFilterPushDownRule.INSTANCE);
+ rules.add(JDBCProjectPushDownRule.INSTANCE);
+ if (!conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_SAFE_PUSHDOWN)) {
+ rules.add(JDBCJoinPushDownRule.INSTANCE);
+ rules.add(JDBCUnionPushDownRule.INSTANCE);
+ rules.add(JDBCAggregationPushDownRule.INSTANCE);
+ rules.add(JDBCSortPushDownRule.INSTANCE);
+ }
+ generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
+ rules.toArray(new RelOptRule[rules.size()]));
+ }
+
+ // 9. Run rules to aid in translation from Calcite tree to Hive tree
+ if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
+ // 9.1. Merge join into multijoin operators (if possible)
+ generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
+ HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
+ HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
+ HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
+ HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
+ // The previous rules can pull up projections through join operators,
+ // thus we run the field trimmer again to push them back down
+ generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN,
+ new HiveFieldTrimmerRule(false));
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ ProjectRemoveRule.INSTANCE, new ProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER));
+ generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
+ HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
+ HiveProjectFilterPullUpConstantsRule.INSTANCE);
+
+ // 9.2. Introduce exchange operators below join/multijoin operators
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
+ HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
+ }
+
+ // Trigger program
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
+ basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
+ "Calcite: Postjoin ordering transformation");
+
+ return basePlan;
+ }
+
private List<String> getTablesUsed(RelNode plan) {
List<String> tablesUsed = new ArrayList<>();
new RelVisitor() {
@@ -2483,52 +2444,51 @@ public class CalcitePlanner extends SemanticAnalyzer {
* @param followPlanChanges
* @param mdProvider
* @param executorProvider
+ * @param order
* @param rules
* @return optimized RelNode
*/
+ @Deprecated
private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule... rules) {
- return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider,
- HepMatchOrder.TOP_DOWN, rules);
+ RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order,
+ RelOptRule... rules) {
+ final HepProgramBuilder programBuilder = new HepProgramBuilder();
+ generatePartialProgram(programBuilder, followPlanChanges, order, rules);
+ return executeProgram(basePlan, programBuilder.build(), mdProvider, executorProvider);
}
/**
- * Run the HEP Planner with the given rule set.
+ * Generate a HEP program with the given rule set.
*
- * @param basePlan
- * @param followPlanChanges
- * @param mdProvider
- * @param executorProvider
+ * @param isCollection
* @param order
* @param rules
- * @return optimized RelNode
+ * @return HEP program
*/
- private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, RexExecutor executorProvider,
- HepMatchOrder order, RelOptRule... rules) {
- HepPlanner planner = createHepPlanner(basePlan.getCluster(), followPlanChanges,
- mdProvider, executorProvider, order, rules);
- planner.setRoot(basePlan);
- return planner.findBestExp();
- }
-
- private HepPlanner createHepPlanner(RelOptCluster cluster, boolean followPlanChanges,
- RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order,
+ private void generatePartialProgram(HepProgramBuilder programBuilder, boolean isCollection, HepMatchOrder order,
RelOptRule... rules) {
- HepProgramBuilder programBuilder = new HepProgramBuilder();
- if (followPlanChanges) {
- programBuilder.addMatchOrder(order);
- programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules));
+ programBuilder.addMatchOrder(order);
+ if (isCollection) {
+ programBuilder.addRuleCollection(ImmutableList.copyOf(rules));
} else {
- // TODO: Should this be also TOP_DOWN?
for (RelOptRule r : rules) {
programBuilder.addRuleInstance(r);
}
}
+ }
+
+ private RelNode executeProgram(RelNode basePlan, HepProgram program,
+ RelMetadataProvider mdProvider, RexExecutor executorProvider) {
+ return executeProgram(basePlan, program, mdProvider, executorProvider, null);
+ }
+
+ private RelNode executeProgram(RelNode basePlan, HepProgram program,
+ RelMetadataProvider mdProvider, RexExecutor executorProvider,
+ List<RelOptMaterialization> materializations) {
// Create planner and copy context
- HepPlanner planner = new HepPlanner(programBuilder.build(),
- cluster.getPlanner().getContext());
+ HepPlanner planner = new HepPlanner(program,
+ basePlan.getCluster().getPlanner().getContext());
List<RelMetadataProvider> list = Lists.newArrayList();
list.add(mdProvider);
@@ -2544,7 +2504,16 @@ public class CalcitePlanner extends SemanticAnalyzer {
planner.setExecutor(executorProvider);
}
- return planner;
+ if (materializations != null) {
+ // Add materializations to planner
+ for (RelOptMaterialization materialization : materializations) {
+ planner.addMaterialization(materialization);
+ }
+ }
+
+ planner.setRoot(basePlan);
+
+ return planner.findBestExp();
}
@SuppressWarnings("nls")
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out
index a87cfd5..f318110 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out
@@ -68,7 +68,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
- HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
@@ -161,7 +161,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
- HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out
index 5fb4a98..701ffb1 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out
@@ -68,7 +68,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100])
HiveProject(s_store_sk=[$0])
HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))])
HiveTableScan(table=[[default, store]], table:alias=[store])
- HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)])
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
index dfb4c33..9167a9c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
@@ -227,7 +227,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveUnion(all=[true])
HiveProject(channel=[_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4])
HiveJoin(condition=[>($3, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3], $f4=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
@@ -251,7 +251,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
+ HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 6}], agg#0=[count()])
@@ -324,7 +324,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(channel=[_UTF-16LE'catalog':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4])
HiveJoin(condition=[>($3, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3], $f4=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
@@ -348,7 +348,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
+ HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 6}], agg#0=[count()])
@@ -421,7 +421,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(channel=[_UTF-16LE'web':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4])
HiveJoin(condition=[>($3, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3], $f4=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
@@ -445,7 +445,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
+ HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 6}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
index 6aaf3a0..bf8a275 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
@@ -123,7 +123,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
HiveSemiJoin(condition=[=($2, $7)], joinType=[inner])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(i_item_sk=[$0])
+ HiveProject($f1=[$0])
HiveAggregate(group=[{1}])
HiveFilter(condition=[>($3, 4)])
HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3])
@@ -146,9 +146,9 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(c_customer_sk=[$0])
+ HiveProject($f0=[$0])
HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveFilter(condition=[IS NOT NULL($1)])
HiveAggregate(group=[{2}], agg#0=[sum($1)])
HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -178,7 +178,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
HiveSemiJoin(condition=[=($3, $7)], joinType=[inner])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(i_item_sk=[$0])
+ HiveProject($f1=[$0])
HiveAggregate(group=[{1}])
HiveFilter(condition=[>($3, 4)])
HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3])
@@ -201,9 +201,9 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(c_customer_sk=[$0])
+ HiveProject($f0=[$0])
HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveFilter(condition=[IS NOT NULL($1)])
HiveAggregate(group=[{2}], agg#0=[sum($1)])
HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
index 5939b3c..287f03b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out
@@ -116,7 +116,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_last_name=[$0], c_first_name=[$1], s_store_name=[$2], $f3=[$3])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveFilter(condition=[IS NOT NULL($3)])
HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out
index c44ab4d..e1cc171 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out
@@ -79,9 +79,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=
HiveProject(ca_address_sk=[$0])
HiveFilter(condition=[AND(=($8, _UTF-16LE'IL'), IS NOT NULL($0))])
HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
- HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out
index 24925a7..5fd5f94 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out
@@ -154,7 +154,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 2000), =($10, 2), IS NOT NULL($0))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5])
+ HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5])
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ca_county=[$0], $f1=[$1])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out
index a47db4e..8419b70 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out
@@ -162,9 +162,9 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject(i_manufact_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject(i_manufact_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_manufact_id=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[sum($7)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out
index 0c77719..817b82f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out
@@ -77,7 +77,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[BETWEEN(false, $2, 15:BIGINT, 20:BIGINT)])
HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
HiveAggregate(group=[{1, 4}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
index e0ac565..1954043 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
@@ -56,10 +56,10 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
index af2f1cc..e96d4c6 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
@@ -148,7 +148,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveProject(d_date_sk=[$0], d_month_seq=[$3])
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6])
+ HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], $f0=[$5], $f1=[$6])
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out
index 3ee0356..8a3780b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out
@@ -148,9 +148,9 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[sum($7)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out
index fb4ca86..8802220 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out
@@ -168,9 +168,9 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[sum($7)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out
index e739cb8..5d7486c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out
@@ -76,7 +76,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, store]], table:alias=[store])
HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_store_sk=[$0], ss_item_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2])
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out
index e5e2858..671959d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out
@@ -71,7 +71,7 @@ HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last])
HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[BETWEEN(false, $2, 1:BIGINT, 5:BIGINT)])
HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
HiveAggregate(group=[{1, 4}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out
index 6bc9ff3..e5a423d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out
@@ -150,7 +150,7 @@ HiveSortLimit(fetch=[100])
HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9])
HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))])
HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
- HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
+ HiveProject($f1=[$0], $f2=[$1], $f2_0=[$2], $f3=[$3], $f4=[$4])
HiveFilter(condition=[>($2, 0)])
HiveAggregate(group=[{2, 3}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out
index efe0661..0ce5feb 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out
@@ -80,9 +80,9 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam
HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12])
HiveFilter(condition=[AND(=($8, _UTF-16LE'IL'), IS NOT NULL($0))])
HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
- HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out
index bdd0085..ac1394e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out
@@ -67,7 +67,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
- HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
@@ -159,7 +159,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum
HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
- HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out
index 95ee6e5..6eb5a95 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out
@@ -67,7 +67,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100])
HiveProject(s_store_sk=[$0])
HiveFilter(condition=[=($24, _UTF-16LE'NM')])
HiveTableScan(table=[[default, store]], table:alias=[store])
- HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)])
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out
index 88e5ea0..43e1b2b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out
@@ -243,7 +243,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
+ HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 6}], agg#0=[count()])
@@ -339,7 +339,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
+ HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 6}], agg#0=[count()])
@@ -435,7 +435,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
+ HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3])
HiveAggregate(group=[{4, 5, 6}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
index dfa794d..ca7c3b3 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
@@ -152,7 +152,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 1999), =($8, 1))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(i_item_sk=[$0])
+ HiveProject($f1=[$0])
HiveFilter(condition=[>($2, 4)])
HiveProject(i_item_sk=[$1], d_date=[$0], $f2=[$2])
HiveAggregate(group=[{3, 4}], agg#0=[count()])
@@ -198,7 +198,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 1999), =($8, 1))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(i_item_sk=[$0])
+ HiveProject($f1=[$0])
HiveFilter(condition=[>($2, 4)])
HiveProject(i_item_sk=[$1], d_date=[$0], $f2=[$2])
HiveAggregate(group=[{3, 4}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
index ddcf036..4fe5b3a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out
@@ -116,7 +116,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_last_name=[$0], c_first_name=[$1], s_store_name=[$2], $f3=[$3])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveFilter(condition=[IS NOT NULL($3)])
HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3])
HiveAggregate(group=[{0, 1, 3}], agg#0=[sum($9)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out
index d6dd7fe..b52604b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out
@@ -79,9 +79,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=
HiveProject(ca_address_sk=[$0])
HiveFilter(condition=[=($8, _UTF-16LE'IL')])
HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
- HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out
index 6933573..ac1764a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out
@@ -154,7 +154,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($
HiveProject(d_date_sk=[$0])
HiveFilter(condition=[AND(=($6, 2000), =($10, 2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5])
+ HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5])
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ca_county=[$0], $f1=[$1])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out
index d917a04..673dc3d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out
@@ -162,9 +162,9 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject(i_manufact_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject(i_manufact_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_manufact_id=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[sum($7)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out
index bb0a571..c7520fa 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out
@@ -76,7 +76,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[BETWEEN(false, $2, 15:BIGINT, 20:BIGINT)])
HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
HiveAggregate(group=[{1, 4}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out
index 87a7121..1566387 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out
@@ -56,10 +56,10 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveFilter(condition=[=($3, 3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)])
- HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
HiveUnion(all=[true])
HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3])
HiveAggregate(group=[{0, 1, 2}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out
index 5da4713..4f3965b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out
@@ -161,7 +161,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveProject($f0=[+($3, 1)])
HiveFilter(condition=[AND(=($6, 1999), =($8, 3))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_county=[$3], ca_state=[$4], s_county=[$5], s_state=[$6])
+ HiveProject($f0=[$0], $f1=[$1], ca_address_sk=[$2], ca_county=[$3], ca_state=[$4], s_county=[$5], s_state=[$6])
HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1])
HiveAggregate(group=[{0, 1}])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out
index e1b2fd3..e94e998 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out
@@ -148,9 +148,9 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[sum($7)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
index c78b94b..eb872d7 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out
@@ -97,7 +97,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)])
HiveFilter(condition=[IS NOT NULL($12)])
HiveTableScan(table=[[default, item]], table:alias=[j])
- HiveProject(d_date_sk=[$0], d_month_seq=[$1], d_month_seq0=[$2])
+ HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2])
HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(d_date_sk=[$0], d_month_seq=[$3])
HiveFilter(condition=[IS NOT NULL($3)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out
index be97f9d..51f2ad9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out
@@ -168,9 +168,9 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveProject(i_item_id=[$0], $f1=[$1])
+ HiveProject($f0=[$0], $f1=[$1])
HiveUnion(all=[true])
HiveProject(i_item_id=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[sum($7)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out
index 4e4bfcf..6028a5c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out
@@ -72,7 +72,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveProject(s_store_sk=[$0], s_store_name=[$5])
HiveTableScan(table=[[default, store]], table:alias=[store])
HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_store_sk=[$0], ss_item_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2])
HiveAggregate(group=[{1, 2}], agg#0=[sum($3)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out
index 1f814a9..cb95169 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out
@@ -70,7 +70,7 @@ HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last])
HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[BETWEEN(false, $2, 1:BIGINT, 5:BIGINT)])
HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2])
HiveAggregate(group=[{1, 4}], agg#0=[count()])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out
index 94bfba4..71c8423 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out
@@ -165,7 +165,7 @@ HiveSortLimit(fetch=[100])
HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
HiveProject(cr_item_sk=[$2], cr_order_number=[$16])
HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns])
- HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
+ HiveProject($f1=[$0], $f2=[$1], $f2_0=[$2], $f3=[$3], $f4=[$4])
HiveFilter(condition=[>($2, 0)])
HiveAggregate(group=[{2, 3}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out
index b3c8f03..f94e41d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out
@@ -80,9 +80,9 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam
HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12])
HiveFilter(condition=[=($8, _UTF-16LE'IL')])
HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
- HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4])
HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2])
HiveAggregate(group=[{1, 3}], agg#0=[sum($5)])