You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/18 22:35:30 UTC
[25/41] hive git commit: HIVE-11678 : Add AggregateProjectMergeRule (Ashutosh Chauhan via Jesus Camacho Rodriguez)

HIVE-11678 : Add AggregateProjectMergeRule (Ashutosh Chauhan via Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1cce5f00
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1cce5f00
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1cce5f00

Branch: refs/heads/llap
Commit: 1cce5f006c595e67a4169851ceb352646759bc27
Parents: 201b1a0
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Sep 16 09:41:25 2015 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 16 09:41:25 2015 -0700

----------------------------------------------------------------------
 .../rules/HiveAggregateProjectMergeRule.java    |  151 ++
 .../calcite/rules/HiveRelFieldTrimmer.java      |  145 +-
 .../translator/PlanModifierForASTConv.java      |    4 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |    2 +
 .../alter_partition_coltype.q.out               |    8 +-
 .../clientpositive/annotate_stats_groupby.q.out |  106 +-
 .../annotate_stats_groupby2.q.out               |   28 +-
 .../results/clientpositive/auto_join18.q.out    |   12 +-
 .../auto_join18_multi_distinct.q.out            |   12 +-
 .../results/clientpositive/auto_join27.q.out    |   18 +-
 .../results/clientpositive/auto_join32.q.out    |    4 +-
 .../clientpositive/binarysortable_1.q.out       |  Bin 4329 -> 4325 bytes
 .../clientpositive/correlationoptimizer2.q.out  |  220 +-
 .../clientpositive/correlationoptimizer6.q.out  |  232 +-
 ql/src/test/results/clientpositive/count.q.out  |   14 +-
 .../results/clientpositive/ctas_colname.q.out   |   52 +-
 .../test/results/clientpositive/database.q.out  |    2 +-
 .../clientpositive/decimal_precision.q.out      |    4 +-
 .../results/clientpositive/decimal_udf.q.out    |   30 +-
 .../results/clientpositive/distinct_stats.q.out |   14 +-
 .../dynpart_sort_opt_vectorization.q.out        |  105 +-
 .../dynpart_sort_optimization.q.out             |  105 +-
 ...ryption_select_read_only_encrypted_tbl.q.out |    4 +-
 .../clientpositive/explain_logical.q.out        |   78 +-
 .../clientpositive/fetch_aggregation.q.out      |    4 +-
 .../test/results/clientpositive/gby_star.q.out  |   54 +-
 .../test/results/clientpositive/groupby12.q.out |    6 +-
 .../results/clientpositive/groupby5_map.q.out   |    4 +-
 .../clientpositive/groupby5_map_skew.q.out      |    4 +-
 .../results/clientpositive/groupby_cube1.q.out  |   12 +-
 .../groupby_distinct_samekey.q.out              |    6 +-
 .../clientpositive/groupby_grouping_sets2.q.out |   10 +-
 .../clientpositive/groupby_grouping_sets3.q.out |   12 +-
 .../clientpositive/groupby_grouping_sets5.q.out |    8 +-
 .../clientpositive/groupby_grouping_sets6.q.out |    8 +-
 .../clientpositive/groupby_position.q.out       |   36 +-
 .../clientpositive/groupby_resolution.q.out     |   60 +-
 .../clientpositive/groupby_rollup1.q.out        |   12 +-
 .../clientpositive/groupby_sort_10.q.out        |    8 +-
 .../clientpositive/groupby_sort_11.q.out        |   10 +-
 .../results/clientpositive/groupby_sort_8.q.out |   12 +-
 ql/src/test/results/clientpositive/having.q.out |   62 +-
 .../test/results/clientpositive/having2.q.out   |   12 +-
 .../clientpositive/index_auto_mult_tables.q.out |   12 +-
 .../clientpositive/index_auto_self_join.q.out   |   12 +-
 .../clientpositive/index_auto_update.q.out      |    6 +-
 .../index_bitmap_auto_partitioned.q.out         |    6 +-
 .../index_bitmap_compression.q.out              |    6 +-
 .../infer_bucket_sort_dyn_part.q.out            |    4 +-
 .../infer_bucket_sort_map_operators.q.out       |    4 +-
 ql/src/test/results/clientpositive/join18.q.out |   12 +-
 .../clientpositive/join18_multi_distinct.q.out  |   12 +-
 ql/src/test/results/clientpositive/join31.q.out |   36 +-
 .../limit_partition_metadataonly.q.out          |    4 +-
 .../results/clientpositive/limit_pushdown.q.out |   36 +-
 .../test/results/clientpositive/lineage2.q.out  |    2 +-
 .../test/results/clientpositive/lineage3.q.out  |    4 +-
 .../list_bucket_query_multiskew_3.q.out         |    2 +-
 .../clientpositive/mapjoin_mapjoin.q.out        |   32 +-
 .../clientpositive/metadata_only_queries.q.out  |    4 +-
 .../results/clientpositive/metadataonly1.q.out  |  112 +-
 .../results/clientpositive/multiMapJoin2.q.out  |  226 +-
 .../nonblock_op_deduplicate.q.out               |    8 +-
 .../results/clientpositive/nonmr_fetch.q.out    |   14 +-
 .../clientpositive/partition_multilevels.q.out  |    8 +-
 .../test/results/clientpositive/ppd_gby.q.out   |   12 +-
 .../test/results/clientpositive/ppd_gby2.q.out  |   60 +-
 .../clientpositive/ppd_join_filter.q.out        |   98 +-
 .../ql_rewrite_gbtoidx_cbo_1.q.out              |  168 +-
 .../ql_rewrite_gbtoidx_cbo_2.q.out              |   94 +-
 .../reduce_deduplicate_extended.q.out           |   32 +-
 .../clientpositive/selectDistinctStar.q.out     |   44 +-
 .../clientpositive/spark/auto_join18.q.out      |   10 +-
 .../spark/auto_join18_multi_distinct.q.out      |   12 +-
 .../clientpositive/spark/auto_join27.q.out      |   18 +-
 .../clientpositive/spark/auto_join32.q.out      |   53 +-
 .../results/clientpositive/spark/count.q.out    |   14 +-
 .../clientpositive/spark/groupby5_map.q.out     |    4 +-
 .../spark/groupby5_map_skew.q.out               |    4 +-
 .../clientpositive/spark/groupby_cube1.q.out    |   12 +-
 .../clientpositive/spark/groupby_position.q.out |   18 +-
 .../spark/groupby_resolution.q.out              |   60 +-
 .../clientpositive/spark/groupby_rollup1.q.out  |   12 +-
 .../results/clientpositive/spark/having.q.out   |   62 +-
 .../spark/infer_bucket_sort_map_operators.q.out |    4 +-
 .../results/clientpositive/spark/join18.q.out   |   10 +-
 .../spark/join18_multi_distinct.q.out           |   12 +-
 .../results/clientpositive/spark/join31.q.out   |   36 +-
 .../spark/limit_partition_metadataonly.q.out    |    4 +-
 .../clientpositive/spark/limit_pushdown.q.out   |   34 +-
 .../clientpositive/spark/mapjoin_mapjoin.q.out  |   24 +-
 .../spark/metadata_only_queries.q.out           |    4 +-
 .../clientpositive/spark/ppd_join_filter.q.out  |   90 +-
 .../spark/ql_rewrite_gbtoidx_cbo_1.q.out        |  168 +-
 .../clientpositive/spark/stats_only_null.q.out  |    8 +-
 .../clientpositive/spark/subquery_in.q.out      |   36 +-
 .../results/clientpositive/spark/union11.q.out  |   42 +-
 .../results/clientpositive/spark/union14.q.out  |   28 +-
 .../results/clientpositive/spark/union15.q.out  |   28 +-
 .../results/clientpositive/spark/union28.q.out  |    4 +-
 .../results/clientpositive/spark/union30.q.out  |    4 +-
 .../results/clientpositive/spark/union33.q.out  |    8 +-
 .../results/clientpositive/spark/union5.q.out   |   34 +-
 .../results/clientpositive/spark/union7.q.out   |   28 +-
 .../clientpositive/spark/union_remove_21.q.out  |    4 +-
 .../spark/vector_count_distinct.q.out           |    4 +-
 .../spark/vector_decimal_aggregate.q.out        |   12 +-
 .../spark/vector_distinct_2.q.out               |   28 +-
 .../clientpositive/spark/vector_groupby_3.q.out |   30 +-
 .../spark/vector_mapjoin_reduce.q.out           |   36 +-
 .../clientpositive/spark/vector_orderby_5.q.out |    6 +-
 .../clientpositive/spark/vectorization_0.q.out  |   16 +-
 .../clientpositive/spark/vectorization_13.q.out |   32 +-
 .../clientpositive/spark/vectorization_15.q.out |   16 +-
 .../clientpositive/spark/vectorization_16.q.out |   16 +-
 .../clientpositive/spark/vectorization_9.q.out  |   16 +-
 .../spark/vectorization_pushdown.q.out          |    4 +-
 .../spark/vectorization_short_regress.q.out     |   74 +-
 .../spark/vectorized_nested_mapjoin.q.out       |   18 +-
 .../spark/vectorized_timestamp_funcs.q.out      |   12 +-
 .../clientpositive/stats_only_null.q.out        |    8 +-
 .../results/clientpositive/stats_ppr_all.q.out  |   16 +-
 .../subq_where_serialization.q.out              |   18 +-
 .../clientpositive/subquery_exists_having.q.out |   48 +-
 .../results/clientpositive/subquery_in.q.out    |   36 +-
 .../clientpositive/subquery_in_having.q.out     |  260 +-
 .../clientpositive/subquery_notexists.q.out     |   18 +-
 .../subquery_notexists_having.q.out             |   26 +-
 .../results/clientpositive/subquery_notin.q.out |   24 +-
 .../subquery_notin_having.q.java1.7.out         |   50 +-
 .../subquery_unqualcolumnrefs.q.out             |   74 +-
 .../results/clientpositive/subquery_views.q.out |    8 +-
 .../test/results/clientpositive/tez/count.q.out |   14 +-
 .../tez/dynamic_partition_pruning.q.out         |   88 +-
 .../tez/dynpart_sort_opt_vectorization.q.out    |   90 +-
 .../tez/dynpart_sort_optimization.q.out         |   89 +-
 .../clientpositive/tez/explainuser_1.q.out      | 2319 +++++++++---------
 .../clientpositive/tez/explainuser_2.q.out      |  782 +++---
 .../results/clientpositive/tez/having.q.out     |   62 +-
 .../clientpositive/tez/limit_pushdown.q.out     |   34 +-
 .../clientpositive/tez/mapjoin_mapjoin.q.out    |   24 +-
 .../tez/metadata_only_queries.q.out             |    4 +-
 .../clientpositive/tez/metadataonly1.q.out      |   44 +-
 .../test/results/clientpositive/tez/mrr.q.out   |   94 +-
 .../clientpositive/tez/selectDistinctStar.q.out |   44 +-
 .../clientpositive/tez/stats_only_null.q.out    |    8 +-
 .../clientpositive/tez/subquery_in.q.out        |   36 +-
 .../results/clientpositive/tez/tez_dml.q.out    |    6 +-
 .../results/clientpositive/tez/union5.q.out     |   44 +-
 .../results/clientpositive/tez/union7.q.out     |   28 +-
 .../clientpositive/tez/unionDistinct_1.q.out    |    8 +-
 .../clientpositive/tez/vector_aggregate_9.q.out |    4 +-
 .../tez/vector_binary_join_groupby.q.out        |    4 +-
 .../tez/vector_count_distinct.q.out             |    4 +-
 .../tez/vector_decimal_aggregate.q.out          |   12 +-
 .../tez/vector_decimal_precision.q.out          |    4 +-
 .../clientpositive/tez/vector_decimal_udf.q.out |   30 +-
 .../clientpositive/tez/vector_distinct_2.q.out  |   28 +-
 .../clientpositive/tez/vector_groupby_3.q.out   |   30 +-
 .../tez/vector_groupby_reduce.q.out             |    8 +-
 .../tez/vector_grouping_sets.q.out              |    8 +-
 .../tez/vector_mapjoin_reduce.q.out             |   36 +-
 .../clientpositive/tez/vector_orderby_5.q.out   |    6 +-
 .../clientpositive/tez/vector_outer_join2.q.out |   20 +-
 .../tez/vector_partition_diff_num_cols.q.out    |   20 +-
 .../tez/vector_partitioned_date_time.q.out      |   12 +-
 .../tez/vector_reduce_groupby_decimal.q.out     |   24 +-
 .../clientpositive/tez/vectorization_0.q.out    |   16 +-
 .../clientpositive/tez/vectorization_13.q.out   |   32 +-
 .../clientpositive/tez/vectorization_15.q.out   |   16 +-
 .../clientpositive/tez/vectorization_16.q.out   |   16 +-
 .../clientpositive/tez/vectorization_9.q.out    |   16 +-
 .../tez/vectorization_limit.q.out               |   14 +-
 .../tez/vectorization_pushdown.q.out            |    4 +-
 .../tez/vectorization_short_regress.q.out       |   74 +-
 .../tez/vectorized_distinct_gby.q.out           |    8 +-
 .../vectorized_dynamic_partition_pruning.q.out  |   88 +-
 .../tez/vectorized_nested_mapjoin.q.out         |   18 +-
 .../clientpositive/tez/vectorized_parquet.q.out |    6 +-
 .../tez/vectorized_timestamp_funcs.q.out        |   12 +-
 ql/src/test/results/clientpositive/udf8.q.out   |    4 +-
 .../test/results/clientpositive/udf_count.q.out |   16 +-
 .../test/results/clientpositive/union11.q.out   |   70 +-
 .../test/results/clientpositive/union14.q.out   |   32 +-
 .../test/results/clientpositive/union15.q.out   |   38 +-
 .../test/results/clientpositive/union28.q.out   |    8 +-
 .../test/results/clientpositive/union30.q.out   |    8 +-
 .../test/results/clientpositive/union33.q.out   |    8 +-
 ql/src/test/results/clientpositive/union5.q.out |   48 +-
 ql/src/test/results/clientpositive/union7.q.out |   32 +-
 .../clientpositive/unionDistinct_1.q.out        |    8 +-
 .../clientpositive/union_remove_21.q.out        |    8 +-
 .../clientpositive/vector_aggregate_9.q.out     |    4 +-
 .../vector_aggregate_without_gby.q.out          |    4 +-
 .../vector_binary_join_groupby.q.out            |    4 +-
 .../clientpositive/vector_count_distinct.q.out  |    6 +-
 .../vector_decimal_aggregate.q.out              |   12 +-
 .../vector_decimal_precision.q.out              |    4 +-
 .../clientpositive/vector_decimal_udf.q.out     |   30 +-
 .../clientpositive/vector_distinct_2.q.out      |   28 +-
 .../clientpositive/vector_groupby_3.q.out       |   30 +-
 .../clientpositive/vector_groupby_reduce.q.out  |    8 +-
 .../clientpositive/vector_grouping_sets.q.out   |    8 +-
 .../clientpositive/vector_left_outer_join.q.out |    8 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |   36 +-
 .../clientpositive/vector_orderby_5.q.out       |    6 +-
 .../clientpositive/vector_outer_join1.q.out     |    8 +-
 .../clientpositive/vector_outer_join2.q.out     |   28 +-
 .../clientpositive/vector_outer_join3.q.out     |   24 +-
 .../clientpositive/vector_outer_join4.q.out     |    8 +-
 .../clientpositive/vector_outer_join5.q.out     |   48 +-
 .../vector_partition_diff_num_cols.q.out        |   20 +-
 .../vector_partitioned_date_time.q.out          |   12 +-
 .../vector_reduce_groupby_decimal.q.out         |   24 +-
 .../clientpositive/vectorization_0.q.out        |   16 +-
 .../clientpositive/vectorization_13.q.out       |   32 +-
 .../clientpositive/vectorization_15.q.out       |   16 +-
 .../clientpositive/vectorization_16.q.out       |   16 +-
 .../clientpositive/vectorization_9.q.out        |   16 +-
 .../clientpositive/vectorization_limit.q.out    |   16 +-
 .../clientpositive/vectorization_pushdown.q.out |    4 +-
 .../vectorization_short_regress.q.out           |   74 +-
 .../vectorized_distinct_gby.q.out               |   12 +-
 .../vectorized_nested_mapjoin.q.out             |   26 +-
 .../clientpositive/vectorized_parquet.q.out     |    6 +-
 .../vectorized_parquet_types.q.out              |    6 +-
 .../vectorized_timestamp_funcs.q.out            |   12 +-
 227 files changed, 4818 insertions(+), 5017 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java
new file mode 100644
index 0000000..53f04ee
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Planner rule that recognizes a {@link HiveAggregate}
+ * on top of a {@link HiveProject} and if possible
+ * aggregate through the project or removes the project.
+ *
+ * <p>This is only possible when the grouping expressions and arguments to
+ * the aggregate functions are field references (i.e. not expressions).
+ *
+ * <p>In some cases, this rule has the effect of trimming: the aggregate will
+ * use fewer columns than the project did.
+ */
+public class HiveAggregateProjectMergeRule extends RelOptRule {
+  public static final HiveAggregateProjectMergeRule INSTANCE =
+      new HiveAggregateProjectMergeRule();
+
+  /** Private constructor. */
+  private HiveAggregateProjectMergeRule() {
+    super(
+        operand(HiveAggregate.class,
+            operand(HiveProject.class, any())));
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    final HiveAggregate aggregate = call.rel(0);
+    final HiveProject project = call.rel(1);
+    RelNode x = apply(aggregate, project);
+    if (x != null) {
+      call.transformTo(x);
+    }
+  }
+
+  public static RelNode apply(HiveAggregate aggregate,
+      HiveProject project) {
+    final List<Integer> newKeys = Lists.newArrayList();
+    final Map<Integer, Integer> map = new HashMap<>();
+    for (int key : aggregate.getGroupSet()) {
+      final RexNode rex = project.getProjects().get(key);
+      if (rex instanceof RexInputRef) {
+        final int newKey = ((RexInputRef) rex).getIndex();
+        newKeys.add(newKey);
+        map.put(key, newKey);
+      } else {
+        // Cannot handle "GROUP BY expression"
+        return null;
+      }
+    }
+
+    final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
+    ImmutableList<ImmutableBitSet> newGroupingSets = null;
+    if (aggregate.indicator) {
+      newGroupingSets =
+          ImmutableBitSet.ORDERING.immutableSortedCopy(
+              ImmutableBitSet.permute(aggregate.getGroupSets(), map));
+    }
+
+    final ImmutableList.Builder<AggregateCall> aggCalls =
+        ImmutableList.builder();
+    for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
+      final ImmutableList.Builder<Integer> newArgs = ImmutableList.builder();
+      for (int arg : aggregateCall.getArgList()) {
+        final RexNode rex = project.getProjects().get(arg);
+        if (rex instanceof RexInputRef) {
+          newArgs.add(((RexInputRef) rex).getIndex());
+        } else {
+          // Cannot handle "AGG(expression)"
+          return null;
+        }
+      }
+      final int newFilterArg;
+      if (aggregateCall.filterArg >= 0) {
+        final RexNode rex = project.getProjects().get(aggregateCall.filterArg);
+        if (!(rex instanceof RexInputRef)) {
+          return null;
+        }
+        newFilterArg = ((RexInputRef) rex).getIndex();
+      } else {
+        newFilterArg = -1;
+      }
+      aggCalls.add(aggregateCall.copy(newArgs.build(), newFilterArg));
+    }
+
+    final Aggregate newAggregate =
+        aggregate.copy(aggregate.getTraitSet(), project.getInput(),
+            aggregate.indicator, newGroupSet, newGroupingSets,
+            aggCalls.build());
+
+    // Add a project if the group set is not in the same order or
+    // contains duplicates.
+    RelNode rel = newAggregate;
+    if (!newKeys.equals(newGroupSet.asList())) {
+      final List<Integer> posList = Lists.newArrayList();
+      for (int newKey : newKeys) {
+        posList.add(newGroupSet.indexOf(newKey));
+      }
+      if (aggregate.indicator) {
+        for (int newKey : newKeys) {
+          posList.add(aggregate.getGroupCount() + newGroupSet.indexOf(newKey));
+        }
+      }
+      for (int i = newAggregate.getGroupCount()
+                   + newAggregate.getIndicatorCount();
+           i < newAggregate.getRowType().getFieldCount(); i++) {
+        posList.add(i);
+      }
+      rel = RelOptUtil.createProject(HiveProject.DEFAULT_PROJECT_FACTORY,
+          rel, posList);
+    }
+
+    return rel;
+  }
+}
+
+// End AggregateProjectMergeRule.java

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 4144674..a12fa2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -24,11 +24,10 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.calcite.plan.RelOptUtil;
-import org.apache.calcite.rel.RelCollation;
-import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
 import org.apache.calcite.rel.core.RelFactories;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexNode;
@@ -37,20 +36,19 @@ import org.apache.calcite.rex.RexVisitor;
 import org.apache.calcite.sql.validate.SqlValidator;
 import org.apache.calcite.sql2rel.RelFieldTrimmer;
 import org.apache.calcite.util.ImmutableBitSet;
-import org.apache.calcite.util.Util;
 import org.apache.calcite.util.mapping.IntPair;
 import org.apache.calcite.util.mapping.Mapping;
 import org.apache.calcite.util.mapping.MappingType;
 import org.apache.calcite.util.mapping.Mappings;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
 
+import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 
 public class HiveRelFieldTrimmer extends RelFieldTrimmer {
 
-  public HiveRelFieldTrimmer(SqlValidator validator) {
-    super(validator);
-  }
+  private final RelFactories.AggregateFactory aggregateFactory;
 
   public HiveRelFieldTrimmer(SqlValidator validator,
       RelFactories.ProjectFactory projectFactory,
@@ -62,6 +60,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
       RelFactories.SetOpFactory setOpFactory) {
     super(validator, projectFactory, filterFactory, joinFactory,
             semiJoinFactory, sortFactory, aggregateFactory, setOpFactory);
+    this.aggregateFactory = aggregateFactory;
   }
 
   /**
@@ -156,27 +155,127 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
 
     return new TrimResult(newJoin, mapping);
   }
-
-  protected TrimResult trimChild(
-      RelNode rel,
-      RelNode input,
+  /**
+   * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+   * {@link org.apache.calcite.rel.logical.LogicalAggregate}.
+   */
+  @Override
+  public TrimResult trimFields(
+      Aggregate aggregate,
       ImmutableBitSet fieldsUsed,
       Set<RelDataTypeField> extraFields) {
-    Util.discard(rel);
-    if (input.getClass().getName().endsWith("MedMdrClassExtentRel")) {
-      // MedMdrJoinRule cannot handle Join of Project of
-      // MedMdrClassExtentRel, only naked MedMdrClassExtentRel.
-      // So, disable trimming.
-      fieldsUsed = ImmutableBitSet.range(input.getRowType().getFieldCount());
+    // Fields:
+    //
+    // | sys fields | group fields | indicator fields | agg functions |
+    //
+    // Two kinds of trimming:
+    //
+    // 1. If agg rel has system fields but none of these are used, create an
+    // agg rel with no system fields.
+    //
+    // 2. If aggregate functions are not used, remove them.
+    //
+    // But group and indicator fields stay, even if they are not used.
+
+    final RelDataType rowType = aggregate.getRowType();
+
+    // Compute which input fields are used.
+    // 1. group fields are always used
+    final ImmutableBitSet.Builder inputFieldsUsed =
+        ImmutableBitSet.builder(aggregate.getGroupSet());
+    // 2. agg functions
+    for (AggregateCall aggCall : aggregate.getAggCallList()) {
+      for (int i : aggCall.getArgList()) {
+        inputFieldsUsed.set(i);
+      }
+      if (aggCall.filterArg >= 0) {
+        inputFieldsUsed.set(aggCall.filterArg);
+      }
+    }
+
+    // Create input with trimmed columns.
+    final RelNode input = aggregate.getInput();
+    final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+    final TrimResult trimResult =
+        trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields);
+    final RelNode newInput = trimResult.left;
+    final Mapping inputMapping = trimResult.right;
+
+    // We have to return group keys and (if present) indicators.
+    // So, pretend that the consumer asked for them.
+    final int groupCount = aggregate.getGroupSet().cardinality();
+    final int indicatorCount = aggregate.getIndicatorCount();
+    fieldsUsed =
+        fieldsUsed.union(ImmutableBitSet.range(groupCount + indicatorCount));
+
+    // If the input is unchanged, and we need to project all columns,
+    // there's nothing to do.
+    if (input == newInput
+        && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) {
+      return new TrimResult(
+          aggregate,
+          Mappings.createIdentity(rowType.getFieldCount()));
     }
-    final ImmutableList<RelCollation> collations =
-        RelMetadataQuery.collations(input);
-    for (RelCollation collation : collations) {
-      for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
-        fieldsUsed = fieldsUsed.set(fieldCollation.getFieldIndex());
+
+    // Which agg calls are used by our consumer?
+    int j = groupCount + indicatorCount;
+    int usedAggCallCount = 0;
+    for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
+      if (fieldsUsed.get(j++)) {
+        ++usedAggCallCount;
       }
     }
-    return dispatchTrimFields(input, fieldsUsed, extraFields);
+
+    // Offset due to the number of system fields having changed.
+    Mapping mapping =
+        Mappings.create(
+            MappingType.INVERSE_SURJECTION,
+            rowType.getFieldCount(),
+            groupCount + indicatorCount + usedAggCallCount);
+
+    final ImmutableBitSet newGroupSet =
+        Mappings.apply(inputMapping, aggregate.getGroupSet());
+
+    final ImmutableList<ImmutableBitSet> newGroupSets =
+        ImmutableList.copyOf(
+            Iterables.transform(aggregate.getGroupSets(),
+                new Function<ImmutableBitSet, ImmutableBitSet>() {
+                  @Override
+                  public ImmutableBitSet apply(ImmutableBitSet input) {
+                    return Mappings.apply(inputMapping, input);
+                  }
+                }));
+
+    // Populate mapping of where to find the fields. System, group key and
+    // indicator fields first.
+    for (j = 0; j < groupCount + indicatorCount; j++) {
+      mapping.set(j, j);
+    }
+
+    // Now create new agg calls, and populate mapping for them.
+    final List<AggregateCall> newAggCallList = new ArrayList<>();
+    j = groupCount + indicatorCount;
+    for (AggregateCall aggCall : aggregate.getAggCallList()) {
+      if (fieldsUsed.get(j)) {
+        AggregateCall newAggCall =
+            aggCall.copy(Mappings.apply2(inputMapping, aggCall.getArgList()),
+                Mappings.apply(inputMapping, aggCall.filterArg));
+        if (newAggCall.equals(aggCall)) {
+          newAggCall = aggCall; // immutable -> canonize to save space
+        }
+        mapping.set(j, groupCount + indicatorCount + newAggCallList.size());
+        newAggCallList.add(newAggCall);
+      }
+      ++j;
+    }
+
+    RelNode newAggregate = aggregateFactory.createAggregate(newInput,
+        aggregate.indicator, newGroupSet, newGroupSets, newAggCallList);
+
+    assert newAggregate.getClass() == aggregate.getClass();
+
+    return new TrimResult(newAggregate, mapping);
   }
 
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index 67f17c2..16a375c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -265,8 +265,8 @@ public class PlanModifierForASTConv {
 
     // TODO: Verify GB having is not a separate filter (if so we shouldn't
     // introduce derived table)
-    if (parent instanceof Filter || parent instanceof Join
-        || parent instanceof SetOp) {
+    if (parent instanceof Filter || parent instanceof Join || parent instanceof SetOp ||
+       (parent instanceof Aggregate && filterNode.getInputs().get(0) instanceof Aggregate)) {
       validParent = false;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index d5c747f..0a7ce3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -134,6 +134,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule;
@@ -883,6 +884,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE);
       hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE);
       hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY));
+      hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE);
 
       hepPgm = hepPgmBldr.build();
       HepPlanner hepPlanner = new HepPlanner(hepPgm);

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 9fc3c8d..a42b464 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -232,8 +232,8 @@ STAGE PLANS:
               name: default.alter_coltype
             name: default.alter_coltype
       Truncated Path -> Alias:
-        /alter_coltype/dt=100/ts=3.0 [$hdt$_0:alter_coltype]
-        /alter_coltype/dt=100/ts=6.30 [$hdt$_0:alter_coltype]
+        /alter_coltype/dt=100/ts=3.0 [alter_coltype]
+        /alter_coltype/dt=100/ts=6.30 [alter_coltype]
       Needs Tagging: false
       Reduce Operator Tree:
         Group By Operator
@@ -411,7 +411,7 @@ STAGE PLANS:
               name: default.alter_coltype
             name: default.alter_coltype
       Truncated Path -> Alias:
-        /alter_coltype/dt=100/ts=6.30 [$hdt$_0:alter_coltype]
+        /alter_coltype/dt=100/ts=6.30 [alter_coltype]
       Needs Tagging: false
       Reduce Operator Tree:
         Group By Operator
@@ -568,7 +568,7 @@ STAGE PLANS:
               name: default.alter_coltype
             name: default.alter_coltype
       Truncated Path -> Alias:
-        /alter_coltype/dt=100/ts=3.0 [$hdt$_0:alter_coltype]
+        /alter_coltype/dt=100/ts=3.0 [alter_coltype]
       Needs Tagging: false
       Reduce Operator Tree:
         Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index 82cc0da..1b9ec68 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -157,11 +157,11 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
               Group By Operator
                 aggregations: count()
-                keys: _col0 (type: string), _col1 (type: int)
+                keys: state (type: string), locid (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
@@ -178,22 +178,18 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
-          Select Operator
-            expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: int)
+          Group By Operator
+            aggregations: min(_col1)
+            keys: _col0 (type: string), _col2 (type: bigint)
+            mode: hash
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
-            Group By Operator
-              aggregations: min(_col2)
-              keys: _col0 (type: string), _col1 (type: bigint)
-              mode: hash
-              outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
@@ -255,10 +251,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: year (type: int)
-              outputColumnNames: _col0
+              outputColumnNames: year
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: int)
+                keys: year (type: int)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
@@ -308,10 +304,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int)
+                keys: state (type: string), locid (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
@@ -361,10 +357,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
@@ -415,10 +411,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
@@ -469,10 +465,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
@@ -523,10 +519,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
@@ -577,10 +573,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
@@ -631,10 +627,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
@@ -689,10 +685,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: year (type: int)
-              outputColumnNames: _col0
+              outputColumnNames: year
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: int)
+                keys: year (type: int)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
@@ -742,10 +738,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
@@ -798,10 +794,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), zip (type: bigint)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, zip
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: bigint)
+                keys: state (type: string), zip (type: bigint)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
@@ -851,10 +847,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
@@ -905,10 +901,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
@@ -959,10 +955,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
@@ -1013,10 +1009,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
@@ -1067,10 +1063,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
@@ -1121,10 +1117,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
@@ -1175,10 +1171,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: year (type: int)
-              outputColumnNames: _col0
+              outputColumnNames: year
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: int)
+                keys: year (type: int)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
@@ -1228,10 +1224,10 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), locid (type: int)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
index 2cb1e84..be3fa1d 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
@@ -91,10 +91,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), country (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: string)
+                keys: state (type: string), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
@@ -144,10 +144,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: state (type: string), country (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
+                keys: state (type: string), country (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE
@@ -202,10 +202,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), country (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: string)
+                keys: state (type: string), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE
@@ -257,10 +257,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), votes (type: bigint)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, votes
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: bigint)
+                keys: state (type: string), votes (type: bigint)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
@@ -310,10 +310,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), country (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
+                keys: state (type: string), country (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 8256 Basic stats: COMPLETE Column stats: COMPLETE
@@ -364,10 +364,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), country (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: string)
+                keys: state (type: string), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE
@@ -417,10 +417,10 @@ STAGE PLANS:
             Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), country (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
+                keys: state (type: string), country (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join18.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join18.q.out b/ql/src/test/results/clientpositive/auto_join18.q.out
index 6dc7a63..7fd7dd1 100644
--- a/ql/src/test/results/clientpositive/auto_join18.q.out
+++ b/ql/src/test/results/clientpositive/auto_join18.q.out
@@ -40,11 +40,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -140,11 +140,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(DISTINCT _col1)
-                keys: _col0 (type: string), _col1 (type: string)
+                aggregations: count(DISTINCT value)
+                keys: key (type: string), value (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
index cc17ad1..0a9dd76 100644
--- a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -142,11 +142,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(DISTINCT _col1), count(DISTINCT _col0)
-                keys: _col0 (type: string), _col1 (type: string)
+                aggregations: count(DISTINCT value), count(DISTINCT key)
+                keys: key (type: string), value (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join27.q.out b/ql/src/test/results/clientpositive/auto_join27.q.out
index 16a7f02..9c03c78 100644
--- a/ql/src/test/results/clientpositive/auto_join27.q.out
+++ b/ql/src/test/results/clientpositive/auto_join27.q.out
@@ -39,20 +39,16 @@ STAGE PLANS:
             Filter Operator
               predicate: (UDFToDouble(key) < 200.0) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                keys: key (type: string), value (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col1 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out
index f862870..161ab6b 100644
--- a/ql/src/test/results/clientpositive/auto_join32.q.out
+++ b/ql/src/test/results/clientpositive/auto_join32.q.out
@@ -411,10 +411,10 @@ STAGE PLANS:
                   outputColumnNames: _col1, _col3
                   Select Operator
                     expressions: _col3 (type: string), _col1 (type: string)
-                    outputColumnNames: _col0, _col1
+                    outputColumnNames: _col3, _col1
                     Group By Operator
                       aggregations: count(DISTINCT _col1)
-                      keys: _col0 (type: string), _col1 (type: string)
+                      keys: _col3 (type: string), _col1 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2
                       Reduce Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/binarysortable_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/binarysortable_1.q.out b/ql/src/test/results/clientpositive/binarysortable_1.q.out
index 421fd2e..9ef9221 100644
Binary files a/ql/src/test/results/clientpositive/binarysortable_1.q.out and b/ql/src/test/results/clientpositive/binarysortable_1.q.out differ

http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
index c1a20c8..96c7660 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
@@ -41,22 +41,18 @@ STAGE PLANS:
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count(value)
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
+                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -144,22 +140,18 @@ STAGE PLANS:
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count(value)
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
+                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -228,44 +220,36 @@ STAGE PLANS:
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count(value)
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
+                  value expressions: _col1 (type: bigint)
           TableScan
             alias: y
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count(value)
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
+                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Demux Operator
           Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
@@ -411,11 +395,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -511,11 +495,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -592,11 +576,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -611,11 +595,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -770,11 +754,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -870,11 +854,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -951,11 +935,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -970,11 +954,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1129,11 +1113,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1229,11 +1213,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1310,11 +1294,11 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1329,11 +1313,11 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
+              outputColumnNames: key, value
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(_col1)
-                keys: _col0 (type: string)
+                aggregations: count(value)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1489,10 +1473,10 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string)
-              outputColumnNames: _col0
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1614,10 +1598,10 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string)
-              outputColumnNames: _col0
+              outputColumnNames: key
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1703,10 +1687,10 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string)
-              outputColumnNames: _col0
+              outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1720,10 +1704,10 @@ STAGE PLANS:
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string)
-              outputColumnNames: _col0
+              outputColumnNames: key
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: _col0 (type: string)
+                keys: key (type: string)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -2029,22 +2013,18 @@ STAGE PLANS:
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count(value)
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
+                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -2144,22 +2124,18 @@ STAGE PLANS:
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: string), value (type: string)
+              Group By Operator
+                aggregations: count(value)
+                keys: key (type: string)
+                mode: hash
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(_col1)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
+                  value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Demux Operator
           Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE