You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2015/09/16 18:46:11 UTC
[14/14] hive git commit: HIVE-11678 : Add AggregateProjectMergeRule
(Ashutosh Chauhan via Jesus Camacho Rodriguez)
HIVE-11678 : Add AggregateProjectMergeRule (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1cce5f00
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1cce5f00
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1cce5f00
Branch: refs/heads/master
Commit: 1cce5f006c595e67a4169851ceb352646759bc27
Parents: 201b1a0
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Sep 16 09:41:25 2015 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Sep 16 09:41:25 2015 -0700
----------------------------------------------------------------------
.../rules/HiveAggregateProjectMergeRule.java | 151 ++
.../calcite/rules/HiveRelFieldTrimmer.java | 145 +-
.../translator/PlanModifierForASTConv.java | 4 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 2 +
.../alter_partition_coltype.q.out | 8 +-
.../clientpositive/annotate_stats_groupby.q.out | 106 +-
.../annotate_stats_groupby2.q.out | 28 +-
.../results/clientpositive/auto_join18.q.out | 12 +-
.../auto_join18_multi_distinct.q.out | 12 +-
.../results/clientpositive/auto_join27.q.out | 18 +-
.../results/clientpositive/auto_join32.q.out | 4 +-
.../clientpositive/binarysortable_1.q.out | Bin 4329 -> 4325 bytes
.../clientpositive/correlationoptimizer2.q.out | 220 +-
.../clientpositive/correlationoptimizer6.q.out | 232 +-
ql/src/test/results/clientpositive/count.q.out | 14 +-
.../results/clientpositive/ctas_colname.q.out | 52 +-
.../test/results/clientpositive/database.q.out | 2 +-
.../clientpositive/decimal_precision.q.out | 4 +-
.../results/clientpositive/decimal_udf.q.out | 30 +-
.../results/clientpositive/distinct_stats.q.out | 14 +-
.../dynpart_sort_opt_vectorization.q.out | 105 +-
.../dynpart_sort_optimization.q.out | 105 +-
...ryption_select_read_only_encrypted_tbl.q.out | 4 +-
.../clientpositive/explain_logical.q.out | 78 +-
.../clientpositive/fetch_aggregation.q.out | 4 +-
.../test/results/clientpositive/gby_star.q.out | 54 +-
.../test/results/clientpositive/groupby12.q.out | 6 +-
.../results/clientpositive/groupby5_map.q.out | 4 +-
.../clientpositive/groupby5_map_skew.q.out | 4 +-
.../results/clientpositive/groupby_cube1.q.out | 12 +-
.../groupby_distinct_samekey.q.out | 6 +-
.../clientpositive/groupby_grouping_sets2.q.out | 10 +-
.../clientpositive/groupby_grouping_sets3.q.out | 12 +-
.../clientpositive/groupby_grouping_sets5.q.out | 8 +-
.../clientpositive/groupby_grouping_sets6.q.out | 8 +-
.../clientpositive/groupby_position.q.out | 36 +-
.../clientpositive/groupby_resolution.q.out | 60 +-
.../clientpositive/groupby_rollup1.q.out | 12 +-
.../clientpositive/groupby_sort_10.q.out | 8 +-
.../clientpositive/groupby_sort_11.q.out | 10 +-
.../results/clientpositive/groupby_sort_8.q.out | 12 +-
ql/src/test/results/clientpositive/having.q.out | 62 +-
.../test/results/clientpositive/having2.q.out | 12 +-
.../clientpositive/index_auto_mult_tables.q.out | 12 +-
.../clientpositive/index_auto_self_join.q.out | 12 +-
.../clientpositive/index_auto_update.q.out | 6 +-
.../index_bitmap_auto_partitioned.q.out | 6 +-
.../index_bitmap_compression.q.out | 6 +-
.../infer_bucket_sort_dyn_part.q.out | 4 +-
.../infer_bucket_sort_map_operators.q.out | 4 +-
ql/src/test/results/clientpositive/join18.q.out | 12 +-
.../clientpositive/join18_multi_distinct.q.out | 12 +-
ql/src/test/results/clientpositive/join31.q.out | 36 +-
.../limit_partition_metadataonly.q.out | 4 +-
.../results/clientpositive/limit_pushdown.q.out | 36 +-
.../test/results/clientpositive/lineage2.q.out | 2 +-
.../test/results/clientpositive/lineage3.q.out | 4 +-
.../list_bucket_query_multiskew_3.q.out | 2 +-
.../clientpositive/mapjoin_mapjoin.q.out | 32 +-
.../clientpositive/metadata_only_queries.q.out | 4 +-
.../results/clientpositive/metadataonly1.q.out | 112 +-
.../results/clientpositive/multiMapJoin2.q.out | 226 +-
.../nonblock_op_deduplicate.q.out | 8 +-
.../results/clientpositive/nonmr_fetch.q.out | 14 +-
.../clientpositive/partition_multilevels.q.out | 8 +-
.../test/results/clientpositive/ppd_gby.q.out | 12 +-
.../test/results/clientpositive/ppd_gby2.q.out | 60 +-
.../clientpositive/ppd_join_filter.q.out | 98 +-
.../ql_rewrite_gbtoidx_cbo_1.q.out | 168 +-
.../ql_rewrite_gbtoidx_cbo_2.q.out | 94 +-
.../reduce_deduplicate_extended.q.out | 32 +-
.../clientpositive/selectDistinctStar.q.out | 44 +-
.../clientpositive/spark/auto_join18.q.out | 10 +-
.../spark/auto_join18_multi_distinct.q.out | 12 +-
.../clientpositive/spark/auto_join27.q.out | 18 +-
.../clientpositive/spark/auto_join32.q.out | 53 +-
.../results/clientpositive/spark/count.q.out | 14 +-
.../clientpositive/spark/groupby5_map.q.out | 4 +-
.../spark/groupby5_map_skew.q.out | 4 +-
.../clientpositive/spark/groupby_cube1.q.out | 12 +-
.../clientpositive/spark/groupby_position.q.out | 18 +-
.../spark/groupby_resolution.q.out | 60 +-
.../clientpositive/spark/groupby_rollup1.q.out | 12 +-
.../results/clientpositive/spark/having.q.out | 62 +-
.../spark/infer_bucket_sort_map_operators.q.out | 4 +-
.../results/clientpositive/spark/join18.q.out | 10 +-
.../spark/join18_multi_distinct.q.out | 12 +-
.../results/clientpositive/spark/join31.q.out | 36 +-
.../spark/limit_partition_metadataonly.q.out | 4 +-
.../clientpositive/spark/limit_pushdown.q.out | 34 +-
.../clientpositive/spark/mapjoin_mapjoin.q.out | 24 +-
.../spark/metadata_only_queries.q.out | 4 +-
.../clientpositive/spark/ppd_join_filter.q.out | 90 +-
.../spark/ql_rewrite_gbtoidx_cbo_1.q.out | 168 +-
.../clientpositive/spark/stats_only_null.q.out | 8 +-
.../clientpositive/spark/subquery_in.q.out | 36 +-
.../results/clientpositive/spark/union11.q.out | 42 +-
.../results/clientpositive/spark/union14.q.out | 28 +-
.../results/clientpositive/spark/union15.q.out | 28 +-
.../results/clientpositive/spark/union28.q.out | 4 +-
.../results/clientpositive/spark/union30.q.out | 4 +-
.../results/clientpositive/spark/union33.q.out | 8 +-
.../results/clientpositive/spark/union5.q.out | 34 +-
.../results/clientpositive/spark/union7.q.out | 28 +-
.../clientpositive/spark/union_remove_21.q.out | 4 +-
.../spark/vector_count_distinct.q.out | 4 +-
.../spark/vector_decimal_aggregate.q.out | 12 +-
.../spark/vector_distinct_2.q.out | 28 +-
.../clientpositive/spark/vector_groupby_3.q.out | 30 +-
.../spark/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/spark/vector_orderby_5.q.out | 6 +-
.../clientpositive/spark/vectorization_0.q.out | 16 +-
.../clientpositive/spark/vectorization_13.q.out | 32 +-
.../clientpositive/spark/vectorization_15.q.out | 16 +-
.../clientpositive/spark/vectorization_16.q.out | 16 +-
.../clientpositive/spark/vectorization_9.q.out | 16 +-
.../spark/vectorization_pushdown.q.out | 4 +-
.../spark/vectorization_short_regress.q.out | 74 +-
.../spark/vectorized_nested_mapjoin.q.out | 18 +-
.../spark/vectorized_timestamp_funcs.q.out | 12 +-
.../clientpositive/stats_only_null.q.out | 8 +-
.../results/clientpositive/stats_ppr_all.q.out | 16 +-
.../subq_where_serialization.q.out | 18 +-
.../clientpositive/subquery_exists_having.q.out | 48 +-
.../results/clientpositive/subquery_in.q.out | 36 +-
.../clientpositive/subquery_in_having.q.out | 260 +-
.../clientpositive/subquery_notexists.q.out | 18 +-
.../subquery_notexists_having.q.out | 26 +-
.../results/clientpositive/subquery_notin.q.out | 24 +-
.../subquery_notin_having.q.java1.7.out | 50 +-
.../subquery_unqualcolumnrefs.q.out | 74 +-
.../results/clientpositive/subquery_views.q.out | 8 +-
.../test/results/clientpositive/tez/count.q.out | 14 +-
.../tez/dynamic_partition_pruning.q.out | 88 +-
.../tez/dynpart_sort_opt_vectorization.q.out | 90 +-
.../tez/dynpart_sort_optimization.q.out | 89 +-
.../clientpositive/tez/explainuser_1.q.out | 2319 +++++++++---------
.../clientpositive/tez/explainuser_2.q.out | 782 +++---
.../results/clientpositive/tez/having.q.out | 62 +-
.../clientpositive/tez/limit_pushdown.q.out | 34 +-
.../clientpositive/tez/mapjoin_mapjoin.q.out | 24 +-
.../tez/metadata_only_queries.q.out | 4 +-
.../clientpositive/tez/metadataonly1.q.out | 44 +-
.../test/results/clientpositive/tez/mrr.q.out | 94 +-
.../clientpositive/tez/selectDistinctStar.q.out | 44 +-
.../clientpositive/tez/stats_only_null.q.out | 8 +-
.../clientpositive/tez/subquery_in.q.out | 36 +-
.../results/clientpositive/tez/tez_dml.q.out | 6 +-
.../results/clientpositive/tez/union5.q.out | 44 +-
.../results/clientpositive/tez/union7.q.out | 28 +-
.../clientpositive/tez/unionDistinct_1.q.out | 8 +-
.../clientpositive/tez/vector_aggregate_9.q.out | 4 +-
.../tez/vector_binary_join_groupby.q.out | 4 +-
.../tez/vector_count_distinct.q.out | 4 +-
.../tez/vector_decimal_aggregate.q.out | 12 +-
.../tez/vector_decimal_precision.q.out | 4 +-
.../clientpositive/tez/vector_decimal_udf.q.out | 30 +-
.../clientpositive/tez/vector_distinct_2.q.out | 28 +-
.../clientpositive/tez/vector_groupby_3.q.out | 30 +-
.../tez/vector_groupby_reduce.q.out | 8 +-
.../tez/vector_grouping_sets.q.out | 8 +-
.../tez/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/tez/vector_orderby_5.q.out | 6 +-
.../clientpositive/tez/vector_outer_join2.q.out | 20 +-
.../tez/vector_partition_diff_num_cols.q.out | 20 +-
.../tez/vector_partitioned_date_time.q.out | 12 +-
.../tez/vector_reduce_groupby_decimal.q.out | 24 +-
.../clientpositive/tez/vectorization_0.q.out | 16 +-
.../clientpositive/tez/vectorization_13.q.out | 32 +-
.../clientpositive/tez/vectorization_15.q.out | 16 +-
.../clientpositive/tez/vectorization_16.q.out | 16 +-
.../clientpositive/tez/vectorization_9.q.out | 16 +-
.../tez/vectorization_limit.q.out | 14 +-
.../tez/vectorization_pushdown.q.out | 4 +-
.../tez/vectorization_short_regress.q.out | 74 +-
.../tez/vectorized_distinct_gby.q.out | 8 +-
.../vectorized_dynamic_partition_pruning.q.out | 88 +-
.../tez/vectorized_nested_mapjoin.q.out | 18 +-
.../clientpositive/tez/vectorized_parquet.q.out | 6 +-
.../tez/vectorized_timestamp_funcs.q.out | 12 +-
ql/src/test/results/clientpositive/udf8.q.out | 4 +-
.../test/results/clientpositive/udf_count.q.out | 16 +-
.../test/results/clientpositive/union11.q.out | 70 +-
.../test/results/clientpositive/union14.q.out | 32 +-
.../test/results/clientpositive/union15.q.out | 38 +-
.../test/results/clientpositive/union28.q.out | 8 +-
.../test/results/clientpositive/union30.q.out | 8 +-
.../test/results/clientpositive/union33.q.out | 8 +-
ql/src/test/results/clientpositive/union5.q.out | 48 +-
ql/src/test/results/clientpositive/union7.q.out | 32 +-
.../clientpositive/unionDistinct_1.q.out | 8 +-
.../clientpositive/union_remove_21.q.out | 8 +-
.../clientpositive/vector_aggregate_9.q.out | 4 +-
.../vector_aggregate_without_gby.q.out | 4 +-
.../vector_binary_join_groupby.q.out | 4 +-
.../clientpositive/vector_count_distinct.q.out | 6 +-
.../vector_decimal_aggregate.q.out | 12 +-
.../vector_decimal_precision.q.out | 4 +-
.../clientpositive/vector_decimal_udf.q.out | 30 +-
.../clientpositive/vector_distinct_2.q.out | 28 +-
.../clientpositive/vector_groupby_3.q.out | 30 +-
.../clientpositive/vector_groupby_reduce.q.out | 8 +-
.../clientpositive/vector_grouping_sets.q.out | 8 +-
.../clientpositive/vector_left_outer_join.q.out | 8 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 36 +-
.../clientpositive/vector_orderby_5.q.out | 6 +-
.../clientpositive/vector_outer_join1.q.out | 8 +-
.../clientpositive/vector_outer_join2.q.out | 28 +-
.../clientpositive/vector_outer_join3.q.out | 24 +-
.../clientpositive/vector_outer_join4.q.out | 8 +-
.../clientpositive/vector_outer_join5.q.out | 48 +-
.../vector_partition_diff_num_cols.q.out | 20 +-
.../vector_partitioned_date_time.q.out | 12 +-
.../vector_reduce_groupby_decimal.q.out | 24 +-
.../clientpositive/vectorization_0.q.out | 16 +-
.../clientpositive/vectorization_13.q.out | 32 +-
.../clientpositive/vectorization_15.q.out | 16 +-
.../clientpositive/vectorization_16.q.out | 16 +-
.../clientpositive/vectorization_9.q.out | 16 +-
.../clientpositive/vectorization_limit.q.out | 16 +-
.../clientpositive/vectorization_pushdown.q.out | 4 +-
.../vectorization_short_regress.q.out | 74 +-
.../vectorized_distinct_gby.q.out | 12 +-
.../vectorized_nested_mapjoin.q.out | 26 +-
.../clientpositive/vectorized_parquet.q.out | 6 +-
.../vectorized_parquet_types.q.out | 6 +-
.../vectorized_timestamp_funcs.q.out | 12 +-
227 files changed, 4818 insertions(+), 5017 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java
new file mode 100644
index 0000000..53f04ee
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Planner rule that recognizes a {@link HiveAggregate}
+ * on top of a {@link HiveProject} and if possible
+ * aggregate through the project or removes the project.
+ *
+ * <p>This is only possible when the grouping expressions and arguments to
+ * the aggregate functions are field references (i.e. not expressions).
+ *
+ * <p>In some cases, this rule has the effect of trimming: the aggregate will
+ * use fewer columns than the project did.
+ */
+public class HiveAggregateProjectMergeRule extends RelOptRule {
+ public static final HiveAggregateProjectMergeRule INSTANCE =
+ new HiveAggregateProjectMergeRule();
+
+ /** Private constructor. */
+ private HiveAggregateProjectMergeRule() {
+ super(
+ operand(HiveAggregate.class,
+ operand(HiveProject.class, any())));
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final HiveAggregate aggregate = call.rel(0);
+ final HiveProject project = call.rel(1);
+ RelNode x = apply(aggregate, project);
+ if (x != null) {
+ call.transformTo(x);
+ }
+ }
+
+ public static RelNode apply(HiveAggregate aggregate,
+ HiveProject project) {
+ final List<Integer> newKeys = Lists.newArrayList();
+ final Map<Integer, Integer> map = new HashMap<>();
+ for (int key : aggregate.getGroupSet()) {
+ final RexNode rex = project.getProjects().get(key);
+ if (rex instanceof RexInputRef) {
+ final int newKey = ((RexInputRef) rex).getIndex();
+ newKeys.add(newKey);
+ map.put(key, newKey);
+ } else {
+ // Cannot handle "GROUP BY expression"
+ return null;
+ }
+ }
+
+ final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
+ ImmutableList<ImmutableBitSet> newGroupingSets = null;
+ if (aggregate.indicator) {
+ newGroupingSets =
+ ImmutableBitSet.ORDERING.immutableSortedCopy(
+ ImmutableBitSet.permute(aggregate.getGroupSets(), map));
+ }
+
+ final ImmutableList.Builder<AggregateCall> aggCalls =
+ ImmutableList.builder();
+ for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
+ final ImmutableList.Builder<Integer> newArgs = ImmutableList.builder();
+ for (int arg : aggregateCall.getArgList()) {
+ final RexNode rex = project.getProjects().get(arg);
+ if (rex instanceof RexInputRef) {
+ newArgs.add(((RexInputRef) rex).getIndex());
+ } else {
+ // Cannot handle "AGG(expression)"
+ return null;
+ }
+ }
+ final int newFilterArg;
+ if (aggregateCall.filterArg >= 0) {
+ final RexNode rex = project.getProjects().get(aggregateCall.filterArg);
+ if (!(rex instanceof RexInputRef)) {
+ return null;
+ }
+ newFilterArg = ((RexInputRef) rex).getIndex();
+ } else {
+ newFilterArg = -1;
+ }
+ aggCalls.add(aggregateCall.copy(newArgs.build(), newFilterArg));
+ }
+
+ final Aggregate newAggregate =
+ aggregate.copy(aggregate.getTraitSet(), project.getInput(),
+ aggregate.indicator, newGroupSet, newGroupingSets,
+ aggCalls.build());
+
+ // Add a project if the group set is not in the same order or
+ // contains duplicates.
+ RelNode rel = newAggregate;
+ if (!newKeys.equals(newGroupSet.asList())) {
+ final List<Integer> posList = Lists.newArrayList();
+ for (int newKey : newKeys) {
+ posList.add(newGroupSet.indexOf(newKey));
+ }
+ if (aggregate.indicator) {
+ for (int newKey : newKeys) {
+ posList.add(aggregate.getGroupCount() + newGroupSet.indexOf(newKey));
+ }
+ }
+ for (int i = newAggregate.getGroupCount()
+ + newAggregate.getIndicatorCount();
+ i < newAggregate.getRowType().getFieldCount(); i++) {
+ posList.add(i);
+ }
+ rel = RelOptUtil.createProject(HiveProject.DEFAULT_PROJECT_FACTORY,
+ rel, posList);
+ }
+
+ return rel;
+ }
+}
+
+// End AggregateProjectMergeRule.java
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 4144674..a12fa2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -24,11 +24,10 @@ import java.util.List;
import java.util.Set;
import org.apache.calcite.plan.RelOptUtil;
-import org.apache.calcite.rel.RelCollation;
-import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.RelFactories;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
@@ -37,20 +36,19 @@ import org.apache.calcite.rex.RexVisitor;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql2rel.RelFieldTrimmer;
import org.apache.calcite.util.ImmutableBitSet;
-import org.apache.calcite.util.Util;
import org.apache.calcite.util.mapping.IntPair;
import org.apache.calcite.util.mapping.Mapping;
import org.apache.calcite.util.mapping.MappingType;
import org.apache.calcite.util.mapping.Mappings;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
+import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
public class HiveRelFieldTrimmer extends RelFieldTrimmer {
- public HiveRelFieldTrimmer(SqlValidator validator) {
- super(validator);
- }
+ private final RelFactories.AggregateFactory aggregateFactory;
public HiveRelFieldTrimmer(SqlValidator validator,
RelFactories.ProjectFactory projectFactory,
@@ -62,6 +60,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
RelFactories.SetOpFactory setOpFactory) {
super(validator, projectFactory, filterFactory, joinFactory,
semiJoinFactory, sortFactory, aggregateFactory, setOpFactory);
+ this.aggregateFactory = aggregateFactory;
}
/**
@@ -156,27 +155,127 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
return new TrimResult(newJoin, mapping);
}
-
- protected TrimResult trimChild(
- RelNode rel,
- RelNode input,
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalAggregate}.
+ */
+ @Override
+ public TrimResult trimFields(
+ Aggregate aggregate,
ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
- Util.discard(rel);
- if (input.getClass().getName().endsWith("MedMdrClassExtentRel")) {
- // MedMdrJoinRule cannot handle Join of Project of
- // MedMdrClassExtentRel, only naked MedMdrClassExtentRel.
- // So, disable trimming.
- fieldsUsed = ImmutableBitSet.range(input.getRowType().getFieldCount());
+ // Fields:
+ //
+ // | sys fields | group fields | indicator fields | agg functions |
+ //
+ // Two kinds of trimming:
+ //
+ // 1. If agg rel has system fields but none of these are used, create an
+ // agg rel with no system fields.
+ //
+ // 2. If aggregate functions are not used, remove them.
+ //
+ // But group and indicator fields stay, even if they are not used.
+
+ final RelDataType rowType = aggregate.getRowType();
+
+ // Compute which input fields are used.
+ // 1. group fields are always used
+ final ImmutableBitSet.Builder inputFieldsUsed =
+ ImmutableBitSet.builder(aggregate.getGroupSet());
+ // 2. agg functions
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ for (int i : aggCall.getArgList()) {
+ inputFieldsUsed.set(i);
+ }
+ if (aggCall.filterArg >= 0) {
+ inputFieldsUsed.set(aggCall.filterArg);
+ }
+ }
+
+ // Create input with trimmed columns.
+ final RelNode input = aggregate.getInput();
+ final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+ final TrimResult trimResult =
+ trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields);
+ final RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // We have to return group keys and (if present) indicators.
+ // So, pretend that the consumer asked for them.
+ final int groupCount = aggregate.getGroupSet().cardinality();
+ final int indicatorCount = aggregate.getIndicatorCount();
+ fieldsUsed =
+ fieldsUsed.union(ImmutableBitSet.range(groupCount + indicatorCount));
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing to do.
+ if (input == newInput
+ && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) {
+ return new TrimResult(
+ aggregate,
+ Mappings.createIdentity(rowType.getFieldCount()));
}
- final ImmutableList<RelCollation> collations =
- RelMetadataQuery.collations(input);
- for (RelCollation collation : collations) {
- for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
- fieldsUsed = fieldsUsed.set(fieldCollation.getFieldIndex());
+
+ // Which agg calls are used by our consumer?
+ int j = groupCount + indicatorCount;
+ int usedAggCallCount = 0;
+ for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
+ if (fieldsUsed.get(j++)) {
+ ++usedAggCallCount;
}
}
- return dispatchTrimFields(input, fieldsUsed, extraFields);
+
+ // Offset due to the number of system fields having changed.
+ Mapping mapping =
+ Mappings.create(
+ MappingType.INVERSE_SURJECTION,
+ rowType.getFieldCount(),
+ groupCount + indicatorCount + usedAggCallCount);
+
+ final ImmutableBitSet newGroupSet =
+ Mappings.apply(inputMapping, aggregate.getGroupSet());
+
+ final ImmutableList<ImmutableBitSet> newGroupSets =
+ ImmutableList.copyOf(
+ Iterables.transform(aggregate.getGroupSets(),
+ new Function<ImmutableBitSet, ImmutableBitSet>() {
+ @Override
+ public ImmutableBitSet apply(ImmutableBitSet input) {
+ return Mappings.apply(inputMapping, input);
+ }
+ }));
+
+ // Populate mapping of where to find the fields. System, group key and
+ // indicator fields first.
+ for (j = 0; j < groupCount + indicatorCount; j++) {
+ mapping.set(j, j);
+ }
+
+ // Now create new agg calls, and populate mapping for them.
+ final List<AggregateCall> newAggCallList = new ArrayList<>();
+ j = groupCount + indicatorCount;
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ if (fieldsUsed.get(j)) {
+ AggregateCall newAggCall =
+ aggCall.copy(Mappings.apply2(inputMapping, aggCall.getArgList()),
+ Mappings.apply(inputMapping, aggCall.filterArg));
+ if (newAggCall.equals(aggCall)) {
+ newAggCall = aggCall; // immutable -> canonize to save space
+ }
+ mapping.set(j, groupCount + indicatorCount + newAggCallList.size());
+ newAggCallList.add(newAggCall);
+ }
+ ++j;
+ }
+
+ RelNode newAggregate = aggregateFactory.createAggregate(newInput,
+ aggregate.indicator, newGroupSet, newGroupSets, newAggCallList);
+
+ assert newAggregate.getClass() == aggregate.getClass();
+
+ return new TrimResult(newAggregate, mapping);
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index 67f17c2..16a375c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -265,8 +265,8 @@ public class PlanModifierForASTConv {
// TODO: Verify GB having is not a separate filter (if so we shouldn't
// introduce derived table)
- if (parent instanceof Filter || parent instanceof Join
- || parent instanceof SetOp) {
+ if (parent instanceof Filter || parent instanceof Join || parent instanceof SetOp ||
+ (parent instanceof Aggregate && filterNode.getInputs().get(0) instanceof Aggregate)) {
validParent = false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index d5c747f..0a7ce3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -134,6 +134,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule;
@@ -883,6 +884,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE);
hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE);
hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY));
+ hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE);
hepPgm = hepPgmBldr.build();
HepPlanner hepPlanner = new HepPlanner(hepPgm);
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 9fc3c8d..a42b464 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -232,8 +232,8 @@ STAGE PLANS:
name: default.alter_coltype
name: default.alter_coltype
Truncated Path -> Alias:
- /alter_coltype/dt=100/ts=3.0 [$hdt$_0:alter_coltype]
- /alter_coltype/dt=100/ts=6.30 [$hdt$_0:alter_coltype]
+ /alter_coltype/dt=100/ts=3.0 [alter_coltype]
+ /alter_coltype/dt=100/ts=6.30 [alter_coltype]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -411,7 +411,7 @@ STAGE PLANS:
name: default.alter_coltype
name: default.alter_coltype
Truncated Path -> Alias:
- /alter_coltype/dt=100/ts=6.30 [$hdt$_0:alter_coltype]
+ /alter_coltype/dt=100/ts=6.30 [alter_coltype]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -568,7 +568,7 @@ STAGE PLANS:
name: default.alter_coltype
name: default.alter_coltype
Truncated Path -> Alias:
- /alter_coltype/dt=100/ts=3.0 [$hdt$_0:alter_coltype]
+ /alter_coltype/dt=100/ts=3.0 [alter_coltype]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index 82cc0da..1b9ec68 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -157,11 +157,11 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: count()
- keys: _col0 (type: string), _col1 (type: int)
+ keys: state (type: string), locid (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
@@ -178,22 +178,18 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: int)
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col0 (type: string), _col2 (type: bigint)
+ mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: min(_col2)
- keys: _col0 (type: string), _col1 (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -255,10 +251,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: year (type: int)
- outputColumnNames: _col0
+ outputColumnNames: year
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: int)
+ keys: year (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
@@ -308,10 +304,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int)
+ keys: state (type: string), locid (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
@@ -361,10 +357,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
@@ -415,10 +411,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
@@ -469,10 +465,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
@@ -523,10 +519,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
@@ -577,10 +573,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
@@ -631,10 +627,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
@@ -689,10 +685,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: year (type: int)
- outputColumnNames: _col0
+ outputColumnNames: year
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: int)
+ keys: year (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
@@ -742,10 +738,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
@@ -798,10 +794,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), zip (type: bigint)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, zip
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
- keys: _col0 (type: string), _col1 (type: bigint)
+ keys: state (type: string), zip (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
@@ -851,10 +847,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
@@ -905,10 +901,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
@@ -959,10 +955,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
@@ -1013,10 +1009,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
@@ -1067,10 +1063,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
@@ -1121,10 +1117,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
@@ -1175,10 +1171,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: year (type: int)
- outputColumnNames: _col0
+ outputColumnNames: year
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: int)
+ keys: year (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
@@ -1228,10 +1224,10 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, locid
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: int), '0' (type: string)
+ keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
index 2cb1e84..be3fa1d 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
@@ -91,10 +91,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), country (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, country
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: string)
+ keys: state (type: string), country (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
@@ -144,10 +144,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), country (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, country
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
+ keys: state (type: string), country (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE
@@ -202,10 +202,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), country (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, country
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: string)
+ keys: state (type: string), country (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE
@@ -257,10 +257,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), votes (type: bigint)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, votes
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
- keys: _col0 (type: string), _col1 (type: bigint)
+ keys: state (type: string), votes (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
@@ -310,10 +310,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), country (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, country
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
+ keys: state (type: string), country (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 32 Data size: 8256 Basic stats: COMPLETE Column stats: COMPLETE
@@ -364,10 +364,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), country (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, country
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: string)
+ keys: state (type: string), country (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE
@@ -417,10 +417,10 @@ STAGE PLANS:
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), country (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: state, country
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
+ keys: state (type: string), country (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join18.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join18.q.out b/ql/src/test/results/clientpositive/auto_join18.q.out
index 6dc7a63..7fd7dd1 100644
--- a/ql/src/test/results/clientpositive/auto_join18.q.out
+++ b/ql/src/test/results/clientpositive/auto_join18.q.out
@@ -40,11 +40,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -140,11 +140,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT _col1)
- keys: _col0 (type: string), _col1 (type: string)
+ aggregations: count(DISTINCT value)
+ keys: key (type: string), value (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
index cc17ad1..0a9dd76 100644
--- a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -142,11 +142,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT _col1), count(DISTINCT _col0)
- keys: _col0 (type: string), _col1 (type: string)
+ aggregations: count(DISTINCT value), count(DISTINCT key)
+ keys: key (type: string), value (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join27.q.out b/ql/src/test/results/clientpositive/auto_join27.q.out
index 16a7f02..9c03c78 100644
--- a/ql/src/test/results/clientpositive/auto_join27.q.out
+++ b/ql/src/test/results/clientpositive/auto_join27.q.out
@@ -39,20 +39,16 @@ STAGE PLANS:
Filter Operator
predicate: (UDFToDouble(key) < 200.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string), _col1 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/auto_join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out
index f862870..161ab6b 100644
--- a/ql/src/test/results/clientpositive/auto_join32.q.out
+++ b/ql/src/test/results/clientpositive/auto_join32.q.out
@@ -411,10 +411,10 @@ STAGE PLANS:
outputColumnNames: _col1, _col3
Select Operator
expressions: _col3 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col3, _col1
Group By Operator
aggregations: count(DISTINCT _col1)
- keys: _col0 (type: string), _col1 (type: string)
+ keys: _col3 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Reduce Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/binarysortable_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/binarysortable_1.q.out b/ql/src/test/results/clientpositive/binarysortable_1.q.out
index 421fd2e..9ef9221 100644
Binary files a/ql/src/test/results/clientpositive/binarysortable_1.q.out and b/ql/src/test/results/clientpositive/binarysortable_1.q.out differ
http://git-wip-us.apache.org/repos/asf/hive/blob/1cce5f00/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
index c1a20c8..96c7660 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer2.q.out
@@ -41,22 +41,18 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -144,22 +140,18 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -228,44 +220,36 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
TableScan
alias: y
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Demux Operator
Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE
@@ -411,11 +395,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -511,11 +495,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -592,11 +576,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -611,11 +595,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -770,11 +754,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -870,11 +854,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -951,11 +935,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -970,11 +954,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1129,11 +1113,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1229,11 +1213,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1310,11 +1294,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1329,11 +1313,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ outputColumnNames: key, value
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
+ aggregations: count(value)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1489,10 +1473,10 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: _col0
+ outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1614,10 +1598,10 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: _col0
+ outputColumnNames: key
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -1703,10 +1687,10 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: _col0
+ outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1720,10 +1704,10 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: _col0
+ outputColumnNames: key
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: string)
+ keys: key (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -2029,22 +2013,18 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -2144,22 +2124,18 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Demux Operator
Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE