You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2020/02/27 16:25:48 UTC
[hive] branch master updated: HIVE-22893: Enhance data size
estimation for fields computed by UDFs (Zoltan Haindrich reviewed by Jesus
Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ffba5d6 HIVE-22893: Enhance data size estimation for fields computed by UDFs (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
ffba5d6 is described below
commit ffba5d6cd97d26c12361e429b09a958b41da421a
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Thu Feb 27 16:20:05 2020 +0000
HIVE-22893: Enhance data size estimation for fields computed by UDFs (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
Close apache/hive#915
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 3 +
.../clientpositive/udaf_example_group_concat.q.out | 8 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 92 +++++++++++--
.../stats/estimator/PessimisticStatCombiner.java | 49 +++++++
.../hive/ql/stats/estimator/StatEstimator.java | 43 ++++++
.../ql/stats/estimator/StatEstimatorProvider.java | 29 ++++
.../org/apache/hadoop/hive/ql/udf/UDFSubstr.java | 52 ++++++-
.../hadoop/hive/ql/udf/generic/GenericUDF.java | 9 ++
.../hive/ql/udf/generic/GenericUDFBridge.java | 14 +-
.../hadoop/hive/ql/udf/generic/GenericUDFCase.java | 30 ++++-
.../hive/ql/udf/generic/GenericUDFCoalesce.java | 25 +++-
.../hadoop/hive/ql/udf/generic/GenericUDFIf.java | 27 +++-
ql/src/test/queries/clientpositive/udf_coalesce.q | 7 +-
.../cbo_rp_gby2_map_multi_distinct.q.out | 28 ++--
.../cbo_rp_groupby3_noskew_multi_distinct.q.out | 4 +-
.../clientpositive/constprog_when_case.q.out | 4 +-
.../clientpositive/count_dist_rewrite.q.out | 6 +-
ql/src/test/results/clientpositive/groupby11.q.out | 14 +-
.../test/results/clientpositive/groupby2_map.q.out | 12 +-
.../groupby2_map_multi_distinct.q.out | 24 ++--
.../results/clientpositive/groupby2_map_skew.q.out | 16 +--
.../results/clientpositive/groupby2_noskew.q.out | 10 +-
.../groupby2_noskew_multi_distinct.q.out | 10 +-
.../test/results/clientpositive/groupby3_map.q.out | 4 +-
.../groupby3_map_multi_distinct.q.out | 4 +-
.../results/clientpositive/groupby3_map_skew.q.out | 4 +-
ql/src/test/results/clientpositive/groupby4.q.out | 12 +-
.../results/clientpositive/groupby4_noskew.q.out | 8 +-
ql/src/test/results/clientpositive/groupby6.q.out | 12 +-
.../test/results/clientpositive/groupby6_map.q.out | 10 +-
.../results/clientpositive/groupby6_map_skew.q.out | 14 +-
.../results/clientpositive/groupby6_noskew.q.out | 8 +-
.../results/clientpositive/groupby8_map_skew.q.out | 8 +-
ql/src/test/results/clientpositive/groupby9.q.out | 32 ++---
.../results/clientpositive/groupby_map_ppr.q.out | 12 +-
.../groupby_map_ppr_multi_distinct.q.out | 12 +-
.../groupby_multi_single_reducer.q.out | 66 ++++-----
.../groupby_multi_single_reducer2.q.out | 20 +--
.../clientpositive/groupby_nocolumnalign.q.out | 4 +-
.../results/clientpositive/groupby_position.q.out | 32 ++---
.../test/results/clientpositive/groupby_ppr.q.out | 10 +-
.../groupby_ppr_multi_distinct.q.out | 10 +-
.../infer_bucket_sort_dyn_part.q.out | 149 +++++++++------------
.../results/clientpositive/list_bucket_dml_6.q.out | 32 ++---
.../results/clientpositive/list_bucket_dml_7.q.out | 32 ++---
.../results/clientpositive/list_bucket_dml_8.q.out | 16 +--
.../clientpositive/llap/count_dist_rewrite.q.out | 6 +-
.../clientpositive/llap/explainuser_2.q.out | 24 ++--
.../results/clientpositive/llap/groupby2.q.out | 14 +-
.../llap/tez_union_multiinsert.q.out | 94 ++++++-------
.../results/clientpositive/llap/udf_coalesce.q.out | 115 +++++++++++++---
.../clientpositive/llap/unionDistinct_1.q.out | 6 +-
.../clientpositive/llap/vector_case_when_1.q.out | 12 +-
.../clientpositive/llap/vector_case_when_2.q.out | 24 ++--
.../clientpositive/llap/vector_groupby4.q.out | 12 +-
.../clientpositive/llap/vector_groupby6.q.out | 12 +-
.../clientpositive/llap/vector_if_expr.q.out | 8 +-
.../results/clientpositive/llap/vector_nvl.q.out | 6 +-
.../results/clientpositive/llap/vector_udf1.q.out | 6 +-
.../llap/vectorized_string_funcs.q.out | 4 +-
.../llap/vectorized_timestamp_funcs.q.out | 8 +-
.../clientpositive/merge_dynamic_partition4.q.out | 78 ++++-------
.../clientpositive/merge_dynamic_partition5.q.out | 78 ++++-------
.../clientpositive/nullgroup4_multi_distinct.q.out | 4 +-
.../offset_limit_global_optimizer.q.out | 80 +++++------
.../perf/tez/constraints/query19.q.out | 14 +-
.../perf/tez/constraints/query79.q.out | 12 +-
.../perf/tez/constraints/query8.q.out | 20 +--
.../perf/tez/constraints/query85.q.out | 6 +-
.../perf/tez/constraints/query99.q.out | 18 +--
.../results/clientpositive/perf/tez/query19.q.out | 14 +-
.../results/clientpositive/perf/tez/query23.q.out | 14 +-
.../results/clientpositive/perf/tez/query79.q.out | 12 +-
.../results/clientpositive/perf/tez/query8.q.out | 20 +--
.../results/clientpositive/perf/tez/query85.q.out | 6 +-
.../results/clientpositive/perf/tez/query99.q.out | 18 +--
.../results/clientpositive/spark/union17.q.out | 16 +--
ql/src/test/results/clientpositive/union17.q.out | 14 +-
.../clientpositive/vector_case_when_1.q.out | 12 +-
.../clientpositive/vector_case_when_2.q.out | 24 ++--
.../results/clientpositive/vector_groupby4.q.out | 12 +-
.../results/clientpositive/vector_groupby6.q.out | 12 +-
.../results/clientpositive/vector_if_expr.q.out | 8 +-
.../test/results/clientpositive/vector_nvl.q.out | 6 +-
.../clientpositive/vectorization_multi_value.q.out | 32 ++---
.../clientpositive/vectorized_string_funcs.q.out | 4 +-
.../vectorized_timestamp_funcs.q.out | 8 +-
.../hadoop/hive/metastore/MetaStoreSchemaInfo.java | 9 +-
88 files changed, 1142 insertions(+), 787 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index bfc2695..1a4d71b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2519,6 +2519,9 @@ public class HiveConf extends Configuration {
"higher compute cost. (NDV means the number of distinct values.). It only affects the FM-Sketch \n" +
"(not the HLL algorithm which is the default), where it computes the number of necessary\n" +
" bitvectors to achieve the accuracy."),
+ HIVE_STATS_ESTIMATORS_ENABLE("hive.stats.estimators.enable", true,
+ "Estimators are able to provide more accurate column statistic infos for UDF results."),
+
/**
* @deprecated Use MetastoreConf.STATS_NDV_TUNER
*/
diff --git a/contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out b/contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out
index 15dd4c0..509b016 100644
--- a/contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out
+++ b/contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out
@@ -39,13 +39,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 526000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 501250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 526000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 501250 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: array<string>)
Reduce Operator Tree:
Group By Operator
@@ -53,10 +53,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index cb2d0a7..bd4a4f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
@@ -73,6 +74,9 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.Statistics.State;
import org.apache.hadoop.hive.ql.stats.BasicStats.Factory;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.NDV;
@@ -81,6 +85,7 @@ import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector;
@@ -1528,18 +1533,7 @@ public class StatsUtils {
return null;
}
} else if (end instanceof ExprNodeConstantDesc) {
-
- // constant projection
- ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end;
-
- colName = encd.getName();
- colType = encd.getTypeString();
- if (encd.getValue() == null) {
- // null projection
- numNulls = numRows;
- } else {
- countDistincts = 1;
- }
+ return buildColStatForConstant(conf, numRows, (ExprNodeConstantDesc) end);
} else if (end instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
colName = engfd.getName();
@@ -1560,6 +1554,30 @@ public class StatsUtils {
}
}
+ if (conf.getBoolVar(ConfVars.HIVE_STATS_ESTIMATORS_ENABLE)) {
+ Optional<StatEstimatorProvider> sep = engfd.getGenericUDF().adapt(StatEstimatorProvider.class);
+ if (sep.isPresent()) {
+ StatEstimator se = sep.get().getStatEstimator();
+ List<ColStatistics> csList = new ArrayList<ColStatistics>();
+ for (ExprNodeDesc child : engfd.getChildren()) {
+ ColStatistics cs = getColStatisticsFromExpression(conf, parentStats, child);
+ if (cs == null) {
+ break;
+ }
+ csList.add(cs);
+ }
+ if (csList.size() == engfd.getChildren().size()) {
+ Optional<ColStatistics> res = se.estimate(csList);
+ if (res.isPresent()) {
+ ColStatistics newStats = res.get();
+ colType = colType.toLowerCase();
+ newStats.setColumnType(colType);
+ newStats.setColumnName(colName);
+ return newStats;
+ }
+ }
+ }
+ }
// fallback to default
countDistincts = getNDVFor(engfd, numRows, parentStats);
} else if (end instanceof ExprNodeColumnListDesc) {
@@ -1590,6 +1608,56 @@ public class StatsUtils {
return colStats;
}
+ private static ColStatistics buildColStatForConstant(HiveConf conf, long numRows, ExprNodeConstantDesc encd) {
+
+ long numNulls = 0;
+ long countDistincts = 0;
+ if (encd.getValue() == null) {
+ // null projection
+ numNulls = numRows;
+ } else {
+ countDistincts = 1;
+ }
+ String colType = encd.getTypeString();
+ colType = colType.toLowerCase();
+ ObjectInspector oi = encd.getWritableObjectInspector();
+ double avgColSize = getAvgColLenOf(conf, oi, colType);
+ ColStatistics colStats = new ColStatistics(encd.getName(), colType);
+ colStats.setAvgColLen(avgColSize);
+ colStats.setCountDistint(countDistincts);
+ colStats.setNumNulls(numNulls);
+
+ Optional<Number> value = getConstValue(encd);
+ if (value.isPresent()) {
+ colStats.setRange(value.get(), value.get());
+ }
+ return colStats;
+ }
+
+ private static Optional<Number> getConstValue(ExprNodeConstantDesc encd) {
+ if (encd.getValue() != null) {
+ String constant = encd.getValue().toString();
+ PrimitiveCategory category = GenericUDAFSum.getReturnType(encd.getTypeInfo());
+ try {
+ switch (category) {
+ case INT:
+ case BYTE:
+ case SHORT:
+ case LONG:
+ return Optional.of(Long.parseLong(constant));
+ case FLOAT:
+ case DOUBLE:
+ case DECIMAL:
+ return Optional.of(Double.parseDouble(constant));
+ default:
+ }
+ } catch (Exception e) {
+ LOG.debug("Interpreting constant (" + constant + ") resulted in exception", e);
+ }
+ }
+ return Optional.empty();
+ }
+
private static boolean isWideningCast(ExprNodeGenericFuncDesc engfd) {
GenericUDF udf = engfd.getGenericUDF();
if (!FunctionRegistry.isOpCast(udf)) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java
new file mode 100644
index 0000000..131b422
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java
@@ -0,0 +1,49 @@
+package org.apache.hadoop.hive.ql.stats.estimator;
+
+import java.util.Optional;
+
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+/**
+ * Combines {@link ColStatistics} objects to provide the most pessimistic estimate.
+ */
+public class PessimisticStatCombiner {
+
+ private boolean inited;
+ private ColStatistics result;
+
+ public void add(ColStatistics stat) {
+ if (!inited) {
+ inited = true;
+ result = stat.clone();
+ result.setRange(null);
+ result.setIsEstimated(true);
+ return;
+ } else {
+ if (stat.getAvgColLen() > result.getAvgColLen()) {
+ result.setAvgColLen(stat.getAvgColLen());
+ }
+ if (stat.getCountDistint() > result.getCountDistint()) {
+ result.setCountDistint(stat.getCountDistint());
+ }
+ if (stat.getNumNulls() > result.getNumNulls()) {
+ result.setNumNulls(stat.getNumNulls());
+ }
+ if (stat.getNumTrues() > result.getNumTrues()) {
+ result.setNumTrues(stat.getNumTrues());
+ }
+ if (stat.getNumFalses() > result.getNumFalses()) {
+ result.setNumFalses(stat.getNumFalses());
+ }
+ if (stat.isFilteredColumn()) {
+ result.setFilterColumn();
+ }
+
+ }
+
+ }
+ public Optional<ColStatistics> getResult() {
+ return Optional.of(result);
+
+ }
+}
\ No newline at end of file
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java
new file mode 100644
index 0000000..d1fc3f2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.stats.estimator;
+
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+/**
+ * Enables statistics related computation on UDFs
+ */
+public interface StatEstimator {
+
+ /**
+ * Computes the output statistics of the actual UDF.
+ *
+ * The estimator should return with a prefereably overestimated {@link ColStatistics} object if possible.
+ * The actual estimation logic may decide to not give an estimation; it should return with {@link Optional#empty()}.
+ *
+ * Note: at the time of the call there will be {@link ColStatistics} for all the arguments; if that is not available - the estimation is skipped.
+ *
+ * @param argStats the statistics for every argument of the UDF
+ * @return {@link ColStatistics} estimate for the actual UDF.
+ */
+ public Optional<ColStatistics> estimate(List<ColStatistics> argStats);
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimatorProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimatorProvider.java
new file mode 100644
index 0000000..96865d1
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimatorProvider.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.stats.estimator;
+
+/**
+ * Marker interface for UDFs to communicate that the usage of StatEstimators is supported by the UDF.
+ */
+public interface StatEstimatorProvider {
+
+ /**
+ * Returns the {@link StatEstimator} for the given UDF instance.
+ */
+ public StatEstimator getStatEstimator();
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
index 5b1964c..cb28cf6 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
@@ -19,12 +19,18 @@
package org.apache.hadoop.hive.ql.udf;
import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStart;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStartLen;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
@@ -48,7 +54,7 @@ import org.apache.hadoop.io.Text;
+ " > SELECT _FUNC_('Facebook', 5, 1) FROM src LIMIT 1;\n"
+ " 'b'")
@VectorizedExpressions({StringSubstrColStart.class, StringSubstrColStartLen.class})
-public class UDFSubstr extends UDF {
+public class UDFSubstr extends UDF implements StatEstimatorProvider {
private final int[] index;
private final Text r;
@@ -131,4 +137,48 @@ public class UDFSubstr extends UDF {
public BytesWritable evaluate(BytesWritable bw, IntWritable pos){
return evaluate(bw, pos, maxValue);
}
+
+ @Override
+ public StatEstimator getStatEstimator() {
+ return new SubStrStatEstimator();
+ }
+
+ private static class SubStrStatEstimator implements StatEstimator {
+
+ @Override
+ public Optional<ColStatistics> estimate(List<ColStatistics> csList) {
+ ColStatistics cs = csList.get(0).clone();
+ // this might bad in a skewed case; consider:
+ // 1 row with 1000 long string
+ // 99 rows with 0 length
+ // orig avg is 10
+ // new avg is 5 (if substr(5)) ; but in reality it will stay ~10
+ Optional<Double> start = getRangeWidth(csList.get(1).getRange());
+ Range startRange = csList.get(1).getRange();
+ if (startRange != null && startRange.minValue != null) {
+ double newAvgColLen = cs.getAvgColLen() - startRange.minValue.doubleValue();
+ if (newAvgColLen > 0) {
+ cs.setAvgColLen(newAvgColLen);
+ }
+ }
+ if (csList.size() > 2) {
+ Range lengthRange = csList.get(2).getRange();
+ if (lengthRange != null && lengthRange.maxValue != null) {
+ Double w = lengthRange.maxValue.doubleValue();
+ if (cs.getAvgColLen() > w) {
+ cs.setAvgColLen(w);
+ }
+ }
+ }
+ return Optional.of(cs);
+ }
+
+ private Optional<Double> getRangeWidth(Range range) {
+ if (range.minValue != null && range.maxValue != null) {
+ return Optional.of(range.maxValue.doubleValue() - range.minValue.doubleValue());
+ }
+ return Optional.empty();
+ }
+
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
index 6597f4b..c1bf325 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf.generic;
import java.io.Closeable;
import java.io.IOException;
+import java.util.Optional;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
@@ -638,4 +639,12 @@ public abstract class GenericUDF implements Closeable {
return i + ORDINAL_SUFFIXES[i % 10];
}
}
+
+ @SuppressWarnings("unchecked")
+ public <T> Optional<T> adapt(Class<T> clazz) {
+ if (clazz.isInstance(this)) {
+ return Optional.of((T) this);
+ }
+ return Optional.empty();
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java
index 7a644fc..0a275ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.udf.generic;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.ArrayList;
+import java.util.Optional;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -97,7 +98,7 @@ public class GenericUDFBridge extends GenericUDF implements Serializable {
this.isOperator = isOperator;
this.udfClassName = udfClassName;
}
-
+
// For Java serialization only
public GenericUDFBridge() {
}
@@ -151,7 +152,7 @@ public class GenericUDFBridge extends GenericUDF implements Serializable {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
try {
- udf = (UDF)getUdfClassInternal().newInstance();
+ udf = getUdfClassInternal().newInstance();
} catch (Exception e) {
throw new UDFArgumentException(
"Unable to instantiate UDF implementation class " + udfClassName + ": " + e);
@@ -249,4 +250,13 @@ public class GenericUDFBridge extends GenericUDF implements Serializable {
public interface UdfWhitelistChecker {
boolean isUdfAllowed(Class<?> clazz);
}
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T> Optional<T> adapt(Class<T> clazz) {
+ if (clazz.isInstance(udf)) {
+ return Optional.of((T) udf);
+ }
+ return super.adapt(clazz);
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java
index 06e9d00..2857835 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.List;
+import java.util.Optional;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
+import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
* GenericUDF Class for SQL construct "CASE a WHEN b THEN c [ELSE f] END".
- *
+ *
* NOTES: 1. a and b should be compatible, or an exception will be
* thrown. 2. c and f should be compatible types, or an exception will be
* thrown.
@@ -49,7 +56,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
+ " END\n"
+ " FROM emp_details")
-public class GenericUDFCase extends GenericUDF {
+public class GenericUDFCase extends GenericUDF implements StatEstimatorProvider {
private transient ObjectInspector[] argumentOIs;
private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
private transient GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver;
@@ -138,4 +145,23 @@ public class GenericUDFCase extends GenericUDF {
return sb.toString();
}
+ @Override
+ public StatEstimator getStatEstimator() {
+ return new CaseStatEstimator();
+ }
+
+ static class CaseStatEstimator implements StatEstimator {
+
+ @Override
+ public Optional<ColStatistics> estimate(List<ColStatistics> argStats) {
+ PessimisticStatCombiner combiner = new PessimisticStatCombiner();
+ for (int i = 1; i < argStats.size(); i += 2) {
+ combiner.add(argStats.get(i));
+ }
+ combiner.add(argStats.get(argStats.size() - 1));
+ return combiner.getResult();
+ }
+ }
+
+
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
index 8ebe9e0..a598905 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
@@ -18,9 +18,16 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.List;
+import java.util.Optional;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
+import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
/**
@@ -33,7 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
value = "_FUNC_(a1, a2, ...) - Returns the first non-null argument",
extended = "Example:\n"
+ " > SELECT _FUNC_(NULL, 1, NULL) FROM src LIMIT 1;\n" + " 1")
-public class GenericUDFCoalesce extends GenericUDF {
+public class GenericUDFCoalesce extends GenericUDF implements StatEstimatorProvider {
private transient ObjectInspector[] argumentOIs;
private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
@@ -72,4 +79,20 @@ public class GenericUDFCoalesce extends GenericUDF {
return getStandardDisplayString("COALESCE", children, ",");
}
+ @Override
+ public StatEstimator getStatEstimator() {
+ return new CoalesceStatEstimator();
+ }
+
+ static class CoalesceStatEstimator implements StatEstimator {
+
+ @Override
+ public Optional<ColStatistics> estimate(List<ColStatistics> argStats) {
+ PessimisticStatCombiner combiner = new PessimisticStatCombiner();
+ for (int i = 0; i < argStats.size(); i++) {
+ combiner.add(argStats.get(i));
+ }
+ return combiner.getResult();
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
index 23708dc..eaa3523 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.List;
+import java.util.Optional;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
@@ -25,6 +28,10 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
+import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
+import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -115,7 +122,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStri
IfExprTimestampScalarColumn.class, IfExprTimestampScalarScalar.class,
})
@VectorizedExpressionsSupportDecimal64()
-public class GenericUDFIf extends GenericUDF {
+public class GenericUDFIf extends GenericUDF implements StatEstimatorProvider {
private transient ObjectInspector[] argumentOIs;
private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
@@ -171,4 +178,22 @@ public class GenericUDFIf extends GenericUDF {
assert (children.length == 3);
return getStandardDisplayString("if", children);
}
+
+ @Override
+ public StatEstimator getStatEstimator() {
+ return new IfStatEstimator();
+ }
+
+ static class IfStatEstimator implements StatEstimator {
+
+ @Override
+ public Optional<ColStatistics> estimate(List<ColStatistics> argStats) {
+ PessimisticStatCombiner combiner = new PessimisticStatCombiner();
+ combiner.add(argStats.get(1));
+ combiner.add(argStats.get(2));
+ return combiner.getResult();
+ }
+
+ }
+
}
diff --git a/ql/src/test/queries/clientpositive/udf_coalesce.q b/ql/src/test/queries/clientpositive/udf_coalesce.q
index 7d87580..6c6594a 100644
--- a/ql/src/test/queries/clientpositive/udf_coalesce.q
+++ b/ql/src/test/queries/clientpositive/udf_coalesce.q
@@ -1,6 +1,7 @@
--! qt:dataset:src_thrift
--! qt:dataset:src
-set hive.fetch.task.conversion=more;
+set hive.cbo.enable=false;
+set hive.fetch.task.conversion=none;
DESCRIBE FUNCTION coalesce;
DESCRIBE FUNCTION EXTENDED coalesce;
@@ -47,6 +48,10 @@ SELECT COALESCE(1),
FROM src tablesample (1 rows);
EXPLAIN
+SELECT COALESCE(key,'x') from src limit 1;
+
+
+EXPLAIN
SELECT COALESCE(src_thrift.lint[1], 999),
COALESCE(src_thrift.lintstring[0].mystring, '999'),
COALESCE(src_thrift.mstringstring['key_2'], '999')
diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
index 8579c83..c813456 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
@@ -38,20 +38,20 @@ STAGE PLANS:
Select Operator
expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
outputColumnNames: $f0, $f1, $f2
- Statistics: Num rows: 500 Data size: 229500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count($f2)
keys: $f0 (type: string), $f1 (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: double), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -59,14 +59,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: $f0, $f1, $f2, $f3, $f4
- Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -75,7 +75,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll')
minReductionHashAggr: 0.99
@@ -204,20 +204,20 @@ STAGE PLANS:
Select Operator
expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
outputColumnNames: $f0, $f1, $f2
- Statistics: Num rows: 500 Data size: 229500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT $f0), sum($f1), sum(DISTINCT $f1), count($f2)
keys: $f0 (type: string), $f1 (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: double), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -225,14 +225,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: $f0, $f1, $f2, $f3, $f4
- Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -241,7 +241,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out
index 9ae6330..c1e3013 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out
@@ -55,12 +55,12 @@ STAGE PLANS:
Select Operator
expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
outputColumnNames: $f0, $f00, $f2
- Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: $f0 (type: string)
null sort order: z
sort order: +
- Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: $f2 (type: double), $f00 (type: double)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/constprog_when_case.q.out b/ql/src/test/results/clientpositive/constprog_when_case.q.out
index f75fa1c..8d3dd98 100644
--- a/ql/src/test/results/clientpositive/constprog_when_case.q.out
+++ b/ql/src/test/results/clientpositive/constprog_when_case.q.out
@@ -52,10 +52,10 @@ STAGE PLANS:
Select Operator
expressions: if((bool0 is true or (null and bool0 is not true and bool0 is not false)), key0, if((((not bool0) is true and bool0 is not true) or (null and bool0 is not true and bool0 is not false)), key1, key2)) (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out
index f8dbec1..ee1cd28 100644
--- a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out
+++ b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out
@@ -762,13 +762,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7
- Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double)
Reduce Operator Tree:
Group By Operator
@@ -776,7 +776,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial2
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6)
mode: partial2
diff --git a/ql/src/test/results/clientpositive/groupby11.q.out b/ql/src/test/results/clientpositive/groupby11.q.out
index a784779..4b62238 100644
--- a/ql/src/test/results/clientpositive/groupby11.q.out
+++ b/ql/src/test/results/clientpositive/groupby11.q.out
@@ -234,7 +234,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -251,7 +251,7 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: vectorized
Reduce Operator Tree:
@@ -260,14 +260,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 307 Data size: 61400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 31314 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 307 Data size: 58944 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 307 Data size: 58944 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -276,7 +276,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
outputColumnNames: key, val1, val2
- Statistics: Num rows: 307 Data size: 85653 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -305,7 +305,7 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 307 Data size: 85653 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: key (type: string), val1 (type: int), val2 (type: int)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby2_map.q.out b/ql/src/test/results/clientpositive/groupby2_map.q.out
index 1569f04..85bebf0 100644
--- a/ql/src/test/results/clientpositive/groupby2_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_map.q.out
@@ -41,13 +41,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: double)
Reduce Operator Tree:
Group By Operator
@@ -55,14 +55,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -71,7 +71,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
index 8d1b345..d49e7de 100644
--- a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
@@ -41,13 +41,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: double), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -55,14 +55,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -71,7 +71,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll')
minReductionHashAggr: 0.99
@@ -195,13 +195,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: double), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -209,14 +209,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -225,7 +225,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
index 762e286..9783f9e 100644
--- a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
@@ -42,13 +42,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: double)
Reduce Operator Tree:
Group By Operator
@@ -56,7 +56,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -73,7 +73,7 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: double)
Execution mode: vectorized
Reduce Operator Tree:
@@ -82,14 +82,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -98,7 +98,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/groupby2_noskew.q.out
index 6edcdc6..5dd501f 100644
--- a/ql/src/test/results/clientpositive/groupby2_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_noskew.q.out
@@ -48,14 +48,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -64,7 +64,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -97,7 +97,7 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: key (type: string), c1 (type: int), c2 (type: string)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
index ded98a4..4d8a5aa 100644
--- a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
@@ -49,14 +49,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -65,7 +65,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -98,7 +98,7 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out
index af02802..22573cb 100644
--- a/ql/src/test/results/clientpositive/groupby3_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map.q.out
@@ -58,12 +58,12 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
- Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double)
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
index 1f56188..fbd6e4e 100644
--- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
@@ -62,12 +62,12 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
- Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double)
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
index f315663..c6cccd1 100644
--- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
@@ -59,13 +59,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 250 Data size: 348000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 348000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: string), _col5 (type: string), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: struct<count:bigint,sum:double,variance:double>)
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out
index 7528034..d861e47 100644
--- a/ql/src/test/results/clientpositive/groupby4.q.out
+++ b/ql/src/test/results/clientpositive/groupby4.q.out
@@ -49,7 +49,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -66,17 +66,17 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -85,7 +85,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -119,7 +119,7 @@ STAGE PLANS:
null sort order:
sort order:
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out
index 6550017..842bda3 100644
--- a/ql/src/test/results/clientpositive/groupby4_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out
@@ -47,10 +47,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -59,7 +59,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -92,7 +92,7 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out
index be673c5..a72afb2 100644
--- a/ql/src/test/results/clientpositive/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/groupby6.q.out
@@ -49,7 +49,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -66,17 +66,17 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -85,7 +85,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -119,7 +119,7 @@ STAGE PLANS:
null sort order:
sort order:
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out
index ed812ce..9f8c791 100644
--- a/ql/src/test/results/clientpositive/groupby6_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby6_map.q.out
@@ -40,23 +40,23 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -65,7 +65,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(c1, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
index 57f8c3f..43fbc84 100644
--- a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
@@ -41,20 +41,20 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string)
mode: partials
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -71,17 +71,17 @@ STAGE PLANS:
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -90,7 +90,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(c1, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out
index 689775e..9545ca3 100644
--- a/ql/src/test/results/clientpositive/groupby6_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out
@@ -47,10 +47,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -59,7 +59,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -92,7 +92,7 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Execution mode: vectorized
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out
index 8191963..66719b3 100644
--- a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out
@@ -59,13 +59,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
@@ -76,7 +76,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -204,7 +204,7 @@ STAGE PLANS:
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/groupby9.q.out b/ql/src/test/results/clientpositive/groupby9.q.out
index d46ca78..0170f97 100644
--- a/ql/src/test/results/clientpositive/groupby9.q.out
+++ b/ql/src/test/results/clientpositive/groupby9.q.out
@@ -57,13 +57,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
@@ -74,7 +74,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -176,7 +176,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -944,13 +944,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
@@ -961,7 +961,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -1063,7 +1063,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -1831,13 +1831,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
@@ -1848,7 +1848,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -1950,7 +1950,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -3608,13 +3608,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
@@ -3625,7 +3625,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -3727,7 +3727,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
index afdb705..952f310 100644
--- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
+++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
@@ -57,13 +57,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 93500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 93500 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col3 (type: double)
auto parallelism: false
@@ -178,17 +178,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -219,7 +219,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
index a0a0ea4..bd43f54 100644
--- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
@@ -57,13 +57,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 1000 Data size: 491000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 294000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1000 Data size: 491000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 294000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col4 (type: double)
auto parallelism: false
@@ -178,17 +178,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -219,7 +219,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
index 8fa21f2..756c179 100644
--- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
+++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
@@ -94,14 +94,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -109,20 +109,20 @@ STAGE PLANS:
name: default.dest_g4
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
- Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -130,20 +130,20 @@ STAGE PLANS:
name: default.dest_g2
Filter Operator
predicate: (KEY._col0 < 5) (type: boolean)
- Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -349,14 +349,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -364,20 +364,20 @@ STAGE PLANS:
name: default.dest_g4
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
- Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -385,20 +385,20 @@ STAGE PLANS:
name: default.dest_g2
Filter Operator
predicate: (KEY._col0 < 5) (type: boolean)
- Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -467,11 +467,11 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -480,20 +480,20 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
- Statistics: Num rows: 166 Data size: 76194 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 43326 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 166 Data size: 65072 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 166 Data size: 63080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46646 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 166 Data size: 63080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46646 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -508,7 +508,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: zz
sort order: ++
- Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
Execution mode: vectorized
@@ -516,17 +516,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
- Statistics: Num rows: 10 Data size: 3920 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 2930 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 10 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 2810 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 10 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 2810 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
index e4bc26e..d151470 100644
--- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
+++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
@@ -63,20 +63,20 @@ STAGE PLANS:
Statistics: Num rows: 332 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
- Statistics: Num rows: 110 Data size: 30250 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col1:0._col0)
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1
- Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -85,7 +85,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: key, c1
- Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll')
minReductionHashAggr: 0.99
@@ -100,20 +100,20 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Filter Operator
predicate: (KEY._col0 < 5) (type: boolean)
- Statistics: Num rows: 110 Data size: 30250 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 105 Data size: 21000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 10605 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -122,7 +122,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out b/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out
index 19ae138..46d34fb 100644
--- a/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out
+++ b/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out
@@ -41,13 +41,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out
index f52623a..af0c243 100644
--- a/ql/src/test/results/clientpositive/groupby_position.q.out
+++ b/ql/src/test/results/clientpositive/groupby_position.q.out
@@ -56,13 +56,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key < 20) (type: boolean)
Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
@@ -72,7 +72,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -174,7 +174,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -182,14 +182,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -198,7 +198,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, val1, val2
- Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll')
minReductionHashAggr: 0.99
@@ -349,13 +349,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key < 20) (type: boolean)
Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
@@ -365,7 +365,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -467,7 +467,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -475,14 +475,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), CAST( _col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -491,7 +491,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, val1, val2
- Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll')
minReductionHashAggr: 0.99
diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out
index 085ac2c..d7549d9 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out
@@ -171,17 +171,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -212,7 +212,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -279,7 +279,7 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: key (type: string), c1 (type: int), c2 (type: string)
auto parallelism: false
diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
index 0a7275f..95f95b0 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
@@ -171,17 +171,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -212,7 +212,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
outputColumnNames: key, c1, c2, c3, c4
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -279,7 +279,7 @@ STAGE PLANS:
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int)
auto parallelism: false
diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
index ed969fa..9b869a1 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
@@ -419,15 +419,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-4 depends on stages: Stage-0, Stage-2
- Stage-3 depends on stages: Stage-1
- Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
- Stage-6
- Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0, Stage-8
+ Stage-3
Stage-5
- Stage-7
- Stage-8 depends on stages: Stage-7
+ Stage-6 depends on stages: Stage-5
+ Stage-8 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -466,100 +465,37 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), CAST( _col1 AS STRING) (type: string), if(((UDFToDouble(_col0) % 100.0D) = 0.0D), '11', '12') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 112812 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 316 Data size: 112812 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.test_table_n8
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 316 Data size: 173484 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 142516 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- null sort order: zz
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-4
- Stats Work
- Basic Stats Work:
- Column Stats Desc:
- Columns: key, value
- Column Types: string, string
- Table: default.test_table_n8
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col2 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col2 (type: string)
- Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string)
- Execution mode: vectorized
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- Dp Sort State: PARTITION_SORTED
- Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- name: default.test_table_n8
-
- Stage: Stage-9
+ Stage: Stage-7
Conditional Operator
- Stage: Stage-6
+ Stage: Stage-4
Move Operator
files:
hdfs directory: true
@@ -578,26 +514,65 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.test_table_n8
- Stage: Stage-5
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.test_table_n8
+
+ Stage: Stage-3
Merge File Operator
Map Operator Tree:
RCFile Merge Operator
merge level: block
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- Stage: Stage-7
+ Stage: Stage-5
Merge File Operator
Map Operator Tree:
RCFile Merge Operator
merge level: block
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- Stage: Stage-8
+ Stage: Stage-6
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table_n8 PARTITION (ds = '2008-04-08', hr)
SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
(SELECT key, COUNT(*) AS value FROM srcpart
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
index f88f5bf..3ec1d32 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
@@ -49,14 +49,14 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -84,20 +84,20 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
@@ -212,17 +212,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -430,14 +430,14 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -465,20 +465,20 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
@@ -593,17 +593,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
index 43804d6..c41ae71 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
@@ -49,14 +49,14 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -84,20 +84,20 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
@@ -212,17 +212,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -430,14 +430,14 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -465,20 +465,20 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
@@ -593,17 +593,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
index 8551f57..2b82c86 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
@@ -49,14 +49,14 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -84,20 +84,20 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
@@ -212,17 +212,17 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
diff --git a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out
index 2276b99..81069fd 100644
--- a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out
+++ b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out
@@ -763,13 +763,13 @@ STAGE PLANS:
minReductionHashAggr: 0.5
mode: hash
outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7
- Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double)
Execution mode: llap
LLAP IO: no inputs
@@ -781,7 +781,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial2
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6)
mode: partial2
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
index 738bf44..3613e12 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
@@ -3564,7 +3564,7 @@ Stage-4
<-Reducer 4 [SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_17]
PartitionCols:_col0
- Group By Operator [GBY_16] (rows=1 width=280)
+ Group By Operator [GBY_16] (rows=1 width=275)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5)
Group By Operator [GBY_13] (rows=1 width=272)
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
@@ -3612,7 +3612,7 @@ Stage-4
Output:["key","val1","val2"]
Select Operator [SEL_33] (rows=1 width=456)
Output:["_col0","_col1","_col2"]
- Group By Operator [GBY_32] (rows=1 width=464)
+ Group By Operator [GBY_32] (rows=1 width=459)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1
Please refer to the previous Group By Operator [GBY_13]
Stage-5
@@ -3787,7 +3787,7 @@ Stage-4
<-Map 8 [CONTAINS] llap
Reduce Output Operator [RS_52]
PartitionCols:_col0
- Group By Operator [GBY_50] (rows=1 width=280)
+ Group By Operator [GBY_50] (rows=1 width=275)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5)
Select Operator [SEL_48] (rows=500 width=10)
Output:["_col0","_col1"]
@@ -3795,13 +3795,13 @@ Stage-4
Output:["key","value"]
Reduce Output Operator [RS_53]
PartitionCols:_col0, _col1
- Group By Operator [GBY_51] (rows=1 width=464)
+ Group By Operator [GBY_51] (rows=1 width=459)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5)
Please refer to the previous Select Operator [SEL_48]
<-Map 9 [CONTAINS] llap
Reduce Output Operator [RS_59]
PartitionCols:_col0
- Group By Operator [GBY_57] (rows=1 width=280)
+ Group By Operator [GBY_57] (rows=1 width=275)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5)
Select Operator [SEL_55] (rows=500 width=10)
Output:["_col0","_col1"]
@@ -3809,13 +3809,13 @@ Stage-4
Output:["key","value"]
Reduce Output Operator [RS_60]
PartitionCols:_col0, _col1
- Group By Operator [GBY_58] (rows=1 width=464)
+ Group By Operator [GBY_58] (rows=1 width=459)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5)
Please refer to the previous Select Operator [SEL_55]
<-Reducer 2 [CONTAINS] llap
Reduce Output Operator [RS_45]
PartitionCols:_col0
- Group By Operator [GBY_43] (rows=1 width=280)
+ Group By Operator [GBY_43] (rows=1 width=275)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5)
Select Operator [SEL_41] (rows=1 width=272)
Output:["_col0","_col1"]
@@ -3830,7 +3830,7 @@ Stage-4
default@src,s1,Tbl:COMPLETE,Col:COMPLETE
Reduce Output Operator [RS_46]
PartitionCols:_col0, _col1
- Group By Operator [GBY_44] (rows=1 width=464)
+ Group By Operator [GBY_44] (rows=1 width=459)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5)
Please refer to the previous Select Operator [SEL_41]
PARTITION_ONLY_SHUFFLE [RS_22]
@@ -3923,7 +3923,7 @@ Stage-4
<-Map 8 [CONTAINS] llap
Reduce Output Operator [RS_50]
PartitionCols:_col0
- Group By Operator [GBY_48] (rows=1 width=280)
+ Group By Operator [GBY_48] (rows=1 width=275)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5)
Select Operator [SEL_46] (rows=500 width=10)
Output:["_col0","_col1"]
@@ -3931,13 +3931,13 @@ Stage-4
Output:["key","value"]
Reduce Output Operator [RS_51]
PartitionCols:_col0, _col1
- Group By Operator [GBY_49] (rows=1 width=464)
+ Group By Operator [GBY_49] (rows=1 width=459)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5)
Please refer to the previous Select Operator [SEL_46]
<-Reducer 2 [CONTAINS] llap
Reduce Output Operator [RS_43]
PartitionCols:_col0
- Group By Operator [GBY_41] (rows=1 width=280)
+ Group By Operator [GBY_41] (rows=1 width=275)
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5)
Select Operator [SEL_39] (rows=1 width=272)
Output:["_col0","_col1"]
@@ -3952,7 +3952,7 @@ Stage-4
default@src,s1,Tbl:COMPLETE,Col:COMPLETE
Reduce Output Operator [RS_44]
PartitionCols:_col0, _col1
- Group By Operator [GBY_42] (rows=1 width=464)
+ Group By Operator [GBY_42] (rows=1 width=459)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5)
Please refer to the previous Select Operator [SEL_39]
PARTITION_ONLY_SHUFFLE [RS_20]
diff --git a/ql/src/test/results/clientpositive/llap/groupby2.q.out b/ql/src/test/results/clientpositive/llap/groupby2.q.out
index 848b503..bdad787 100644
--- a/ql/src/test/results/clientpositive/llap/groupby2.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby2.q.out
@@ -60,13 +60,13 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 50500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 50500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: double)
Reducer 3
Execution mode: llap
@@ -76,14 +76,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -92,12 +92,12 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
outputColumnNames: key, c1, c2
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: key (type: string), c1 (type: int), c2 (type: string)
Reducer 4
Execution mode: llap
diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out
index 7b6bd1f..54dc91e 100644
--- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out
@@ -103,26 +103,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Map 9
@@ -140,26 +140,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -180,26 +180,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 4
Execution mode: llap
Reduce Operator Tree:
@@ -1082,26 +1082,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Map 7
@@ -1139,26 +1139,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 3
@@ -1281,26 +1281,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Union 2
Vertex: Union 2
@@ -2077,26 +2077,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Map 7
@@ -2134,26 +2134,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 3
@@ -2276,26 +2276,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE
Union 2
Vertex: Union 2
@@ -3083,26 +3083,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -3123,26 +3123,26 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 4
Execution mode: llap
Reduce Operator Tree:
@@ -4072,19 +4072,19 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 125 Data size: 35000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 125 Data size: 34375 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 125 Data size: 35000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 125 Data size: 34375 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 116000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 114750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), CAST( _col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
diff --git a/ql/src/test/results/clientpositive/llap/udf_coalesce.q.out b/ql/src/test/results/clientpositive/llap/udf_coalesce.q.out
index f35ec7a..a934fdc 100644
--- a/ql/src/test/results/clientpositive/llap/udf_coalesce.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_coalesce.q.out
@@ -61,20 +61,39 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Row Limit Per Split: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int), 1 (type: int), 2 (type: int), 1 (type: int), 3 (type: int), 4 (type: int), '1' (type: string), '1' (type: string), '2' (type: string), '1' (type: string), '3' (type: string), '4' (type: string), 1 (type: decimal(1,0)), 1 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), null (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 500 Data size: 547004 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 547004 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: src
- Row Limit Per Split: 1
- Select Operator
- expressions: 1 (type: int), 1 (type: int), 2 (type: int), 1 (type: int), 3 (type: int), 4 (type: int), '1' (type: string), '1' (type: string), '2' (type: string), '1' (type: string), '3' (type: string), '4' (type: string), 1 (type: decimal(1,0)), 1 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), null (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- ListSink
+ ListSink
PREHOOK: query: SELECT COALESCE(1),
COALESCE(1, 2),
@@ -122,6 +141,53 @@ POSTHOOK: Input: default@src
#### A masked pattern was here ####
1 1 2 1 3 4 1 1 2 1 3 4 1 1 2 2 2 NULL
PREHOOK: query: EXPLAIN
+SELECT COALESCE(key,'x') from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT COALESCE(key,'x') from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: COALESCE(key,'x') (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
SELECT COALESCE(src_thrift.lint[1], 999),
COALESCE(src_thrift.lintstring[0].mystring, '999'),
COALESCE(src_thrift.mstringstring['key_2'], '999')
@@ -138,19 +204,38 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_thrift
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src_thrift
+ Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: src_thrift
- Select Operator
- expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string)
- outputColumnNames: _col0, _col1, _col2
- ListSink
+ ListSink
PREHOOK: query: SELECT COALESCE(src_thrift.lint[1], 999),
COALESCE(src_thrift.lintstring[0].mystring, '999'),
diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
index 6c213f0..aa92f46 100644
--- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
@@ -1147,19 +1147,19 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 125 Data size: 35500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 125 Data size: 34875 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 125 Data size: 35500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 125 Data size: 34875 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string)
mode: complete
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 117000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 115750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), CAST( _col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out
index b81c166..7adb53b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out
@@ -216,13 +216,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 21, 26, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 58, 61, 64, 67]
selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:stri [...]
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -553,13 +553,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 24, 33, 40, 44, 48, 52, 54, 56, 58, 60, 62, 64, 68, 71, 74, 77]
selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringS [...]
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -890,13 +890,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 27, 39, 48, 52, 57, 62, 64, 66, 71, 76, 78, 80, 84, 87, 90, 93]
selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLo [...]
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
index bcc7dd2..cc72f45 100644
--- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
@@ -151,7 +151,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 9, 14, 18, 22, 24, 27, 32, 38, 2]
selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar [...]
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
null sort order: zzz
@@ -162,7 +162,7 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 9:string, 14:string, 18:string, 22:int, 24:string, 27:int, 32:int, 38:date
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -204,13 +204,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -441,7 +441,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 12, 21, 28, 32, 34, 37, 42, 48, 2]
selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children [...]
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
null sort order: zzz
@@ -452,7 +452,7 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 12:string, 21:string, 28:string, 32:int, 34:string, 37:int, 42:int, 48:date
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -494,13 +494,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -731,7 +731,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 15, 27, 36, 40, 42, 45, 50, 56, 2]
selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnC [...]
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
null sort order: zzz
@@ -742,7 +742,7 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 15:string, 27:string, 36:string, 40:int, 42:string, 45:int, 50:int, 56:date
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -784,13 +784,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
index e31fe56..f8d6e2d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
@@ -108,7 +108,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
@@ -118,7 +118,7 @@ STAGE PLANS:
className: VectorReduceSinkStringOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -139,13 +139,13 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -158,7 +158,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
@@ -167,7 +167,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Reducer 4
Execution mode: llap
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
index 25b6be6..2042f6d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
@@ -108,7 +108,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
@@ -118,7 +118,7 @@ STAGE PLANS:
className: VectorReduceSinkStringOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -139,13 +139,13 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -158,7 +158,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
@@ -167,7 +167,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Reducer 4
Execution mode: llap
diff --git a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
index 5d955bc..ed73482 100644
--- a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
@@ -47,7 +47,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 13]
selectExpressions: IfExprStringScalarStringScalar(col 10:boolean, val first, val second) -> 13:string
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean)
null sort order: z
@@ -56,7 +56,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -85,13 +85,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
index 420520e..5346ed4 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
@@ -147,19 +147,19 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 14]
selectExpressions: VectorCoalesce(columns [4, 13])(children: col 4:float, ConstantVectorExpression(val 1.0) -> 13:float) -> 14:float
- Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Limit Vectorization:
className: VectorLimitOperator
native: true
- Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
index 956ff47..8ca8a8e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out
@@ -2585,19 +2585,19 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [9, 10, 13]
selectExpressions: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 9:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 11:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 12:string) -> 13:boolean
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1
Limit Vectorization:
className: VectorLimitOperator
native: true
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
index 29e35ec..12e5037 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
@@ -71,10 +71,10 @@ STAGE PLANS:
Select Operator
expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstr [...]
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 0077f08..0e0058b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -274,7 +274,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17]
selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestam [...]
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
@@ -283,7 +283,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -312,13 +312,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out
index c55ef0f..8387880 100644
--- a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out
+++ b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out
@@ -140,14 +140,13 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc_n1@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart_merge_dp_rc_n1@ds=2008-04-08/hr=12
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-3 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
STAGE PLANS:
Stage: Stage-1
@@ -160,79 +159,56 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 2.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.merge_dynamic_part_n3
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col2 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col2 (type: string)
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string)
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- Dp Sort State: PARTITION_SORTED
- Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- name: default.merge_dynamic_part_n3
-
- Stage: Stage-8
+ Stage: Stage-7
Conditional Operator
- Stage: Stage-5
+ Stage: Stage-4
Move Operator
files:
hdfs directory: true
@@ -251,7 +227,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.merge_dynamic_part_n3
- Stage: Stage-3
+ Stage: Stage-2
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -259,21 +235,21 @@ STAGE PLANS:
Column Types: string, string
Table: default.merge_dynamic_part_n3
- Stage: Stage-4
+ Stage: Stage-3
Merge File Operator
Map Operator Tree:
RCFile Merge Operator
merge level: block
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- Stage: Stage-6
+ Stage: Stage-5
Merge File Operator
Map Operator Tree:
RCFile Merge Operator
merge level: block
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- Stage: Stage-7
+ Stage: Stage-6
Move Operator
files:
hdfs directory: true
diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out
index 251c3f8..518f400 100644
--- a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out
+++ b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out
@@ -116,14 +116,13 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=12
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-3 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
STAGE PLANS:
Stage: Stage-1
@@ -136,79 +135,56 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 618 Data size: 163152 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 618 Data size: 163152 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.merge_dynamic_part
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: key, value, ds, hr
- Statistics: Num rows: 618 Data size: 281808 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 618 Data size: 221244 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col2 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col2 (type: string)
- Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string)
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- Dp Sort State: PARTITION_SORTED
- Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- name: default.merge_dynamic_part
-
- Stage: Stage-8
+ Stage: Stage-7
Conditional Operator
- Stage: Stage-5
+ Stage: Stage-4
Move Operator
files:
hdfs directory: true
@@ -227,7 +203,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.merge_dynamic_part
- Stage: Stage-3
+ Stage: Stage-2
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -235,21 +211,21 @@ STAGE PLANS:
Column Types: string, string
Table: default.merge_dynamic_part
- Stage: Stage-4
+ Stage: Stage-3
Merge File Operator
Map Operator Tree:
RCFile Merge Operator
merge level: block
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- Stage: Stage-6
+ Stage: Stage-5
Merge File Operator
Map Operator Tree:
RCFile Merge Operator
merge level: block
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
- Stage: Stage-7
+ Stage: Stage-6
Move Operator
files:
hdfs directory: true
diff --git a/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out b/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
index 318e694..b4ff434 100644
--- a/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
@@ -33,12 +33,12 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 125 Data size: 37375 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 125 Data size: 25125 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
- Statistics: Num rows: 125 Data size: 37375 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 125 Data size: 25125 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out b/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
index e29ca9d..7e1a2c0 100644
--- a/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
+++ b/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
@@ -36,12 +36,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 410
TopN Hash Memory Usage: 0.1
@@ -256,17 +256,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Offset of rows: 400
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -354,12 +354,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 500
TopN Hash Memory Usage: 0.1
@@ -574,17 +574,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Offset of rows: 490
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -672,12 +672,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 510
TopN Hash Memory Usage: 0.1
@@ -892,17 +892,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 20
Offset of rows: 490
- Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1000,12 +1000,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 1090
TopN Hash Memory Usage: 0.1
@@ -1220,17 +1220,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 600
Offset of rows: 490
- Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1903,12 +1903,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 410
TopN Hash Memory Usage: 0.1
@@ -2123,17 +2123,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Offset of rows: 400
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2216,12 +2216,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 500
TopN Hash Memory Usage: 0.1
@@ -2436,17 +2436,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Offset of rows: 490
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2529,12 +2529,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 510
TopN Hash Memory Usage: 0.1
@@ -2749,17 +2749,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 20
Offset of rows: 490
- Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2852,12 +2852,12 @@ STAGE PLANS:
Select Operator
expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
TopN: 1090
TopN Hash Memory Usage: 0.1
@@ -3072,17 +3072,17 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 600
Offset of rows: 490
- Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out
index 94bf30c..4000b81 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out
@@ -96,23 +96,23 @@ Stage-0
PartitionCols:_col0, _col1, _col2, _col3
Group By Operator [GBY_34] (rows=76645658 width=314)
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14
- Select Operator [SEL_33] (rows=76645658 width=650)
+ Select Operator [SEL_33] (rows=76645658 width=458)
Output:["_col8","_col11","_col12","_col13","_col14"]
- Filter Operator [FIL_32] (rows=76645658 width=650)
+ Filter Operator [FIL_32] (rows=76645658 width=458)
predicate:(_col3 <> _col16)
- Merge Join Operator [MERGEJOIN_122] (rows=76645658 width=650)
+ Merge Join Operator [MERGEJOIN_122] (rows=76645658 width=458)
Conds:RS_29._col7=RS_143._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"]
<-Map 14 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_143]
PartitionCols:_col0
- Select Operator [SEL_142] (rows=1704 width=188)
+ Select Operator [SEL_142] (rows=1704 width=92)
Output:["_col0","_col1"]
TableScan [TS_21] (rows=1704 width=93)
default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_zip"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_29]
PartitionCols:_col7
- Merge Join Operator [MERGEJOIN_121] (rows=76645658 width=468)
+ Merge Join Operator [MERGEJOIN_121] (rows=76645658 width=372)
Conds:RS_26._col0=RS_27._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"]
<-Reducer 10 [SIMPLE_EDGE]
SHUFFLE [RS_27]
@@ -165,7 +165,7 @@ Stage-0
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_26]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=188)
+ Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=92)
Conds:RS_125._col1=RS_127._col0(Inner),Output:["_col0","_col3"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_125]
@@ -179,7 +179,7 @@ Stage-0
<-Map 7 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_127]
PartitionCols:_col0
- Select Operator [SEL_126] (rows=40000000 width=188)
+ Select Operator [SEL_126] (rows=40000000 width=92)
Output:["_col0","_col1"]
TableScan [TS_3] (rows=40000000 width=93)
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out
index 2828fb0..2b94615 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out
@@ -72,17 +72,17 @@ Stage-0
Stage-1
Reducer 3 vectorized
File Output Operator [FS_125]
- Limit [LIM_124] (rows=100 width=776)
+ Limit [LIM_124] (rows=100 width=592)
Number of rows:100
- Select Operator [SEL_123] (rows=479121995 width=776)
+ Select Operator [SEL_123] (rows=479121995 width=592)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_32]
- Select Operator [SEL_31] (rows=479121995 width=776)
+ Select Operator [SEL_31] (rows=479121995 width=592)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- Top N Key Operator [TNK_56] (rows=479121995 width=685)
+ Top N Key Operator [TNK_56] (rows=479121995 width=593)
keys:_col2, _col1, substr(_col5, 1, 30), _col7,top n:100
- Merge Join Operator [MERGEJOIN_100] (rows=479121995 width=685)
+ Merge Join Operator [MERGEJOIN_100] (rows=479121995 width=593)
Conds:RS_102._col0=RS_122._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_102]
@@ -94,7 +94,7 @@ Stage-0
<-Reducer 8 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_122]
PartitionCols:_col1
- Select Operator [SEL_121] (rows=479121995 width=508)
+ Select Operator [SEL_121] (rows=479121995 width=416)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
Group By Operator [GBY_120] (rows=479121995 width=328)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
index ed1501b..06b9f6f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
@@ -271,7 +271,7 @@ Stage-0
<-Map 18 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_149]
PartitionCols:_col2
- Select Operator [SEL_148] (rows=1704 width=276)
+ Select Operator [SEL_148] (rows=1704 width=178)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_147] (rows=1704 width=181)
predicate:substr(s_zip, 1, 2) is not null
@@ -280,24 +280,24 @@ Stage-0
<-Reducer 11 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_146]
PartitionCols:_col0
- Select Operator [SEL_145] (rows=1 width=184)
+ Select Operator [SEL_145] (rows=1 width=86)
Output:["_col0"]
- Filter Operator [FIL_144] (rows=1 width=192)
+ Filter Operator [FIL_144] (rows=1 width=96)
predicate:(_col1 = 2L)
- Group By Operator [GBY_143] (rows=5633 width=192)
+ Group By Operator [GBY_143] (rows=5633 width=96)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Union 10 [SIMPLE_EDGE]
<-Reducer 16 [CONTAINS] vectorized
Reduce Output Operator [RS_175]
PartitionCols:_col0
- Group By Operator [GBY_174] (rows=5633 width=192)
+ Group By Operator [GBY_174] (rows=5633 width=96)
Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
- Group By Operator [GBY_173] (rows=1126 width=192)
+ Group By Operator [GBY_173] (rows=1126 width=96)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Reducer 15 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_172]
PartitionCols:_col0
- Group By Operator [GBY_171] (rows=1126 width=192)
+ Group By Operator [GBY_171] (rows=1126 width=96)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
Select Operator [SEL_170] (rows=2253 width=97)
Output:["_col0"]
@@ -333,14 +333,14 @@ Stage-0
<-Reducer 9 [CONTAINS] vectorized
Reduce Output Operator [RS_161]
PartitionCols:_col0
- Group By Operator [GBY_160] (rows=5633 width=192)
+ Group By Operator [GBY_160] (rows=5633 width=96)
Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
- Group By Operator [GBY_159] (rows=10141 width=192)
+ Group By Operator [GBY_159] (rows=10141 width=96)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Map 8 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_158]
PartitionCols:_col0
- Group By Operator [GBY_157] (rows=141974 width=192)
+ Group By Operator [GBY_157] (rows=141974 width=96)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
Select Operator [SEL_156] (rows=20000000 width=89)
Output:["_col0"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out
index 706bbd7..9a550b7 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out
@@ -200,13 +200,13 @@ Stage-0
Stage-1
Reducer 6 vectorized
File Output Operator [FS_209]
- Limit [LIM_208] (rows=72 width=832)
+ Limit [LIM_208] (rows=72 width=656)
Number of rows:100
- Select Operator [SEL_207] (rows=72 width=832)
+ Select Operator [SEL_207] (rows=72 width=656)
Output:["_col0","_col1","_col2","_col3"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_206]
- Select Operator [SEL_205] (rows=72 width=832)
+ Select Operator [SEL_205] (rows=72 width=656)
Output:["_col4","_col5","_col6","_col7"]
Top N Key Operator [TNK_204] (rows=72 width=353)
keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), (_col3 / _col4), (_col5 / _col6),top n:100
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out
index 5310297..98249ac 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out
@@ -96,24 +96,24 @@ Stage-0
Stage-1
Reducer 7 vectorized
File Output Operator [FS_125]
- Limit [LIM_124] (rows=100 width=590)
+ Limit [LIM_124] (rows=100 width=420)
Number of rows:100
- Select Operator [SEL_123] (rows=3920468 width=590)
+ Select Operator [SEL_123] (rows=3920468 width=420)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Reducer 6 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_122]
- Select Operator [SEL_121] (rows=3920468 width=590)
+ Select Operator [SEL_121] (rows=3920468 width=420)
Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- Group By Operator [GBY_120] (rows=3920468 width=406)
+ Group By Operator [GBY_120] (rows=3920468 width=321)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_26]
PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_25] (rows=7840936 width=406)
+ Group By Operator [GBY_25] (rows=7840936 width=321)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col11, _col15
- Top N Key Operator [TNK_56] (rows=15681873 width=386)
+ Top N Key Operator [TNK_56] (rows=15681873 width=301)
keys:_col13, _col11, _col15,top n:100
- Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=386)
+ Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=301)
Conds:RS_21._col1=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"]
<-Map 12 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_119]
@@ -125,12 +125,12 @@ Stage-0
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_21]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=291)
+ Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=206)
Conds:RS_18._col3=RS_117._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"]
<-Map 11 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_117]
PartitionCols:_col0
- Select Operator [SEL_116] (rows=27 width=188)
+ Select Operator [SEL_116] (rows=27 width=103)
Output:["_col0","_col1"]
TableScan [TS_8] (rows=27 width=104)
default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query19.q.out b/ql/src/test/results/clientpositive/perf/tez/query19.q.out
index 55ce944..d94c899 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query19.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query19.q.out
@@ -96,16 +96,16 @@ Stage-0
PartitionCols:_col0, _col1, _col2, _col3
Group By Operator [GBY_36] (rows=76645658 width=314)
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14
- Select Operator [SEL_35] (rows=76645658 width=650)
+ Select Operator [SEL_35] (rows=76645658 width=458)
Output:["_col8","_col11","_col12","_col13","_col14"]
- Filter Operator [FIL_34] (rows=76645658 width=650)
+ Filter Operator [FIL_34] (rows=76645658 width=458)
predicate:(_col3 <> _col16)
- Merge Join Operator [MERGEJOIN_124] (rows=76645658 width=650)
+ Merge Join Operator [MERGEJOIN_124] (rows=76645658 width=458)
Conds:RS_31._col7=RS_147._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"]
<-Map 14 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_147]
PartitionCols:_col0
- Select Operator [SEL_146] (rows=1704 width=188)
+ Select Operator [SEL_146] (rows=1704 width=92)
Output:["_col0","_col1"]
Filter Operator [FIL_145] (rows=1704 width=93)
predicate:s_store_sk is not null
@@ -114,7 +114,7 @@ Stage-0
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_31]
PartitionCols:_col7
- Merge Join Operator [MERGEJOIN_123] (rows=76645658 width=468)
+ Merge Join Operator [MERGEJOIN_123] (rows=76645658 width=372)
Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"]
<-Reducer 10 [SIMPLE_EDGE]
SHUFFLE [RS_29]
@@ -167,7 +167,7 @@ Stage-0
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_28]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_120] (rows=80000000 width=188)
+ Merge Join Operator [MERGEJOIN_120] (rows=80000000 width=92)
Conds:RS_127._col1=RS_130._col0(Inner),Output:["_col0","_col3"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_127]
@@ -181,7 +181,7 @@ Stage-0
<-Map 7 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_130]
PartitionCols:_col0
- Select Operator [SEL_129] (rows=40000000 width=188)
+ Select Operator [SEL_129] (rows=40000000 width=92)
Output:["_col0","_col1"]
Filter Operator [FIL_128] (rows=40000000 width=93)
predicate:ca_address_sk is not null
diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
index e60051b..edde953 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
@@ -173,25 +173,25 @@ Stage-0
PartitionCols:_col0
Group By Operator [GBY_501] (rows=62562 width=4)
Output:["_col0"],keys:_col1
- Select Operator [SEL_500] (rows=183358851 width=290)
+ Select Operator [SEL_500] (rows=183358851 width=220)
Output:["_col1"]
- Filter Operator [FIL_499] (rows=183358851 width=290)
+ Filter Operator [FIL_499] (rows=183358851 width=220)
predicate:(_col3 > 4L)
- Select Operator [SEL_498] (rows=550076554 width=290)
+ Select Operator [SEL_498] (rows=550076554 width=220)
Output:["_col1","_col3"]
- Group By Operator [GBY_497] (rows=550076554 width=290)
+ Group By Operator [GBY_497] (rows=550076554 width=220)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
<-Reducer 15 [SIMPLE_EDGE]
SHUFFLE [RS_23]
PartitionCols:_col0
- Group By Operator [GBY_22] (rows=550076554 width=290)
+ Group By Operator [GBY_22] (rows=550076554 width=220)
Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col3, _col5
- Merge Join Operator [MERGEJOIN_442] (rows=550076554 width=282)
+ Merge Join Operator [MERGEJOIN_442] (rows=550076554 width=212)
Conds:RS_18._col1=RS_496._col0(Inner),Output:["_col3","_col4","_col5"]
<-Map 19 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_496]
PartitionCols:_col0
- Select Operator [SEL_495] (rows=462000 width=188)
+ Select Operator [SEL_495] (rows=462000 width=118)
Output:["_col0","_col1"]
Filter Operator [FIL_494] (rows=462000 width=188)
predicate:i_item_sk is not null
diff --git a/ql/src/test/results/clientpositive/perf/tez/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/query79.q.out
index f7c8e2f..83b29ee 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query79.q.out
@@ -72,17 +72,17 @@ Stage-0
Stage-1
Reducer 3 vectorized
File Output Operator [FS_127]
- Limit [LIM_126] (rows=100 width=776)
+ Limit [LIM_126] (rows=100 width=592)
Number of rows:100
- Select Operator [SEL_125] (rows=479121995 width=776)
+ Select Operator [SEL_125] (rows=479121995 width=592)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_33]
- Select Operator [SEL_32] (rows=479121995 width=776)
+ Select Operator [SEL_32] (rows=479121995 width=592)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- Top N Key Operator [TNK_57] (rows=479121995 width=685)
+ Top N Key Operator [TNK_57] (rows=479121995 width=593)
keys:_col2, _col1, substr(_col5, 1, 30), _col7,top n:100
- Merge Join Operator [MERGEJOIN_101] (rows=479121995 width=685)
+ Merge Join Operator [MERGEJOIN_101] (rows=479121995 width=593)
Conds:RS_104._col0=RS_124._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_104]
@@ -96,7 +96,7 @@ Stage-0
<-Reducer 8 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_124]
PartitionCols:_col1
- Select Operator [SEL_123] (rows=479121995 width=508)
+ Select Operator [SEL_123] (rows=479121995 width=416)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
Group By Operator [GBY_122] (rows=479121995 width=328)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3
diff --git a/ql/src/test/results/clientpositive/perf/tez/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out
index c72498b..27420bd 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query8.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query8.q.out
@@ -271,7 +271,7 @@ Stage-0
<-Map 18 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_149]
PartitionCols:_col2
- Select Operator [SEL_148] (rows=1704 width=276)
+ Select Operator [SEL_148] (rows=1704 width=178)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_147] (rows=1704 width=181)
predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null)
@@ -280,24 +280,24 @@ Stage-0
<-Reducer 11 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_146]
PartitionCols:_col0
- Select Operator [SEL_145] (rows=1 width=184)
+ Select Operator [SEL_145] (rows=1 width=86)
Output:["_col0"]
- Filter Operator [FIL_144] (rows=1 width=192)
+ Filter Operator [FIL_144] (rows=1 width=96)
predicate:(_col1 = 2L)
- Group By Operator [GBY_143] (rows=5633 width=192)
+ Group By Operator [GBY_143] (rows=5633 width=96)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Union 10 [SIMPLE_EDGE]
<-Reducer 16 [CONTAINS] vectorized
Reduce Output Operator [RS_175]
PartitionCols:_col0
- Group By Operator [GBY_174] (rows=5633 width=192)
+ Group By Operator [GBY_174] (rows=5633 width=96)
Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
- Group By Operator [GBY_173] (rows=1126 width=192)
+ Group By Operator [GBY_173] (rows=1126 width=96)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Reducer 15 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_172]
PartitionCols:_col0
- Group By Operator [GBY_171] (rows=1126 width=192)
+ Group By Operator [GBY_171] (rows=1126 width=96)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
Select Operator [SEL_170] (rows=2253 width=97)
Output:["_col0"]
@@ -333,14 +333,14 @@ Stage-0
<-Reducer 9 [CONTAINS] vectorized
Reduce Output Operator [RS_161]
PartitionCols:_col0
- Group By Operator [GBY_160] (rows=5633 width=192)
+ Group By Operator [GBY_160] (rows=5633 width=96)
Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
- Group By Operator [GBY_159] (rows=10141 width=192)
+ Group By Operator [GBY_159] (rows=10141 width=96)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Map 8 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_158]
PartitionCols:_col0
- Group By Operator [GBY_157] (rows=141974 width=192)
+ Group By Operator [GBY_157] (rows=141974 width=96)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
Select Operator [SEL_156] (rows=20000000 width=89)
Output:["_col0"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
index 6e1a562..94ec2f9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
@@ -201,13 +201,13 @@ Stage-0
Stage-1
Reducer 6 vectorized
File Output Operator [FS_239]
- Limit [LIM_238] (rows=72 width=832)
+ Limit [LIM_238] (rows=72 width=656)
Number of rows:100
- Select Operator [SEL_237] (rows=72 width=832)
+ Select Operator [SEL_237] (rows=72 width=656)
Output:["_col0","_col1","_col2","_col3"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_236]
- Select Operator [SEL_235] (rows=72 width=832)
+ Select Operator [SEL_235] (rows=72 width=656)
Output:["_col4","_col5","_col6","_col7"]
Top N Key Operator [TNK_234] (rows=72 width=353)
keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), (_col3 / _col4), (_col5 / _col6),top n:100
diff --git a/ql/src/test/results/clientpositive/perf/tez/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/query99.q.out
index d24d5cc..87e2713 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query99.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query99.q.out
@@ -96,24 +96,24 @@ Stage-0
Stage-1
Reducer 7 vectorized
File Output Operator [FS_131]
- Limit [LIM_130] (rows=100 width=590)
+ Limit [LIM_130] (rows=100 width=420)
Number of rows:100
- Select Operator [SEL_129] (rows=3920468 width=590)
+ Select Operator [SEL_129] (rows=3920468 width=420)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Reducer 6 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_128]
- Select Operator [SEL_127] (rows=3920468 width=590)
+ Select Operator [SEL_127] (rows=3920468 width=420)
Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- Group By Operator [GBY_126] (rows=3920468 width=406)
+ Group By Operator [GBY_126] (rows=3920468 width=321)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_29]
PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_28] (rows=7840936 width=406)
+ Group By Operator [GBY_28] (rows=7840936 width=321)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col15, _col11
- Top N Key Operator [TNK_59] (rows=15681873 width=386)
+ Top N Key Operator [TNK_59] (rows=15681873 width=301)
keys:_col13, _col15, _col11,top n:100
- Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=386)
+ Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=301)
Conds:RS_24._col2=RS_108._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"]
<-Map 11 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_108]
@@ -127,12 +127,12 @@ Stage-0
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_24]
PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=305)
+ Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=220)
Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col11","_col13"]
<-Map 10 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_125]
PartitionCols:_col0
- Select Operator [SEL_124] (rows=27 width=188)
+ Select Operator [SEL_124] (rows=27 width=103)
Output:["_col0","_col1"]
Filter Operator [FIL_123] (rows=27 width=104)
predicate:w_warehouse_sk is not null
diff --git a/ql/src/test/results/clientpositive/spark/union17.q.out b/ql/src/test/results/clientpositive/spark/union17.q.out
index c645207..91939f2 100644
--- a/ql/src/test/results/clientpositive/spark/union17.q.out
+++ b/ql/src/test/results/clientpositive/spark/union17.q.out
@@ -85,13 +85,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL
Map 7
Map Operator Tree:
TableScan
@@ -107,13 +107,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -171,13 +171,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL
Reducer 9
Reduce Operator Tree:
Group By Operator
@@ -195,13 +195,13 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL
Stage: Stage-0
Move Operator
diff --git a/ql/src/test/results/clientpositive/union17.q.out b/ql/src/test/results/clientpositive/union17.q.out
index 480befa..45b0862 100644
--- a/ql/src/test/results/clientpositive/union17.q.out
+++ b/ql/src/test/results/clientpositive/union17.q.out
@@ -95,20 +95,20 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -130,20 +130,20 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(DISTINCT substr(_col1, 5))
keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -245,7 +245,7 @@ STAGE PLANS:
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/vector_case_when_1.q.out b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
index bedde47..e7117e3 100644
--- a/ql/src/test/results/clientpositive/vector_case_when_1.q.out
+++ b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
@@ -213,13 +213,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 21, 26, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 58, 61, 64, 67]
selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, Ve [...]
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -546,13 +546,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 24, 33, 40, 44, 48, 52, 54, 56, 58, 60, 62, 64, 68, 71, 74, 77]
selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar( [...]
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -879,13 +879,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 27, 39, 48, 52, 57, 62, 64, 66, 71, 76, 78, 80, 84, 87, 90, 93]
selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScal [...]
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
index 288e6f0..a7b46fd 100644
--- a/ql/src/test/results/clientpositive/vector_case_when_2.q.out
+++ b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
@@ -145,7 +145,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 9, 14, 18, 22, 24, 27, 32, 38, 2]
selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 3 [...]
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
null sort order: zzz
@@ -155,7 +155,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
Execution mode: vectorized
Map Vectorization:
@@ -181,10 +181,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -409,7 +409,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 12, 21, 28, 32, 34, 37, 42, 48, 2]
selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: Time [...]
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
null sort order: zzz
@@ -419,7 +419,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
Execution mode: vectorized
Map Vectorization:
@@ -445,10 +445,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -673,7 +673,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 15, 27, 36, 40, 42, 45, 50, 56, 2]
selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExp [...]
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
null sort order: zzz
@@ -683,7 +683,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
Execution mode: vectorized
Map Vectorization:
@@ -709,10 +709,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vector_groupby4.q.out b/ql/src/test/results/clientpositive/vector_groupby4.q.out
index 925b96c..8ea182a 100644
--- a/ql/src/test/results/clientpositive/vector_groupby4.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby4.q.out
@@ -90,7 +90,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -114,7 +114,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -134,10 +134,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -146,7 +146,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -187,7 +187,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Execution mode: vectorized
Map Vectorization:
diff --git a/ql/src/test/results/clientpositive/vector_groupby6.q.out b/ql/src/test/results/clientpositive/vector_groupby6.q.out
index b478656..2cba267 100644
--- a/ql/src/test/results/clientpositive/vector_groupby6.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby6.q.out
@@ -90,7 +90,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -114,7 +114,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -134,10 +134,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -146,7 +146,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -187,7 +187,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: c1 (type: string)
Execution mode: vectorized
Map Vectorization:
diff --git a/ql/src/test/results/clientpositive/vector_if_expr.q.out b/ql/src/test/results/clientpositive/vector_if_expr.q.out
index 58c2e1e..292e8c6 100644
--- a/ql/src/test/results/clientpositive/vector_if_expr.q.out
+++ b/ql/src/test/results/clientpositive/vector_if_expr.q.out
@@ -41,7 +41,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 13]
selectExpressions: IfExprStringScalarStringScalar(col 10:boolean, val first, val second) -> 13:string
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean)
null sort order: z
@@ -51,7 +51,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: vectorized
Map Vectorization:
@@ -71,10 +71,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vector_nvl.q.out b/ql/src/test/results/clientpositive/vector_nvl.q.out
index 43ca0ec..26bae3f 100644
--- a/ql/src/test/results/clientpositive/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/vector_nvl.q.out
@@ -140,19 +140,19 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 14]
selectExpressions: VectorCoalesce(columns [4, 13])(children: col 4:float, ConstantVectorExpression(val 1.0) -> 13:float) -> 14:float
- Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Limit Vectorization:
className: VectorLimitOperator
native: true
- Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vectorization_multi_value.q.out b/ql/src/test/results/clientpositive/vectorization_multi_value.q.out
index 2fec50d..b4507fb 100644
--- a/ql/src/test/results/clientpositive/vectorization_multi_value.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_multi_value.q.out
@@ -64,13 +64,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map<string,string>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':'b')) -> 3:map<string,string>) -> 4:map<string,string>
- Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 326 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 326 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -143,13 +143,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [5]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 4:map<string,map<string,string>>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':'c')))(children: VectorUDFAdaptor(map('b':'c')) -> 3:map<string,string>) -> 4:map<string,map<string,string>>) -> 5:map<string,map<string,string>>
- Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 241 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 241 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -222,13 +222,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map<string,string>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':a)) -> 3:map<string,string>) -> 4:map<string,string>
- Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -301,13 +301,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [5]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 4:map<string,map<string,string>>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':a)))(children: VectorUDFAdaptor(map('b':a)) -> 3:map<string,string>) -> 4:map<string,map<string,string>>) -> 5:map<string,map<string,string>>
- Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -380,13 +380,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array<string>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a','b')) -> 3:array<string>) -> 4:array<string>
- Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -459,13 +459,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [6]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 5:array<array<string>>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array(array('a','b'),array('c','d')))(children: VectorUDFAdaptor(array('a','b')) -> 3:array<string>, VectorUDFAdaptor(array('c','d')) -> 4:array<string>) -> 5:array<array<string>>) -> 6:array<array<string>>
- Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -538,13 +538,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array<string>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a',a)) -> 3:array<string>) -> 4:array<string>
- Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -617,13 +617,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [6]
selectExpressions: IfExprCondExprNull(col 2:boolean, col 5:array<array<string>>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array(array('a',a),array('b','c')))(children: VectorUDFAdaptor(array('a',a)) -> 3:array<string>, VectorUDFAdaptor(array('b','c')) -> 4:array<string>) -> 5:array<array<string>>) -> 6:array<array<string>>
- Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 19280 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 19280 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vectorized_string_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_string_funcs.q.out
index 75da191..2c0504a 100644
--- a/ql/src/test/results/clientpositive/vectorized_string_funcs.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_string_funcs.q.out
@@ -68,10 +68,10 @@ STAGE PLANS:
Select Operator
expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, [...]
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
index 907edb6..c94eb90 100644
--- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
@@ -268,7 +268,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17]
selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, fie [...]
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
@@ -278,7 +278,7 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp)
Execution mode: vectorized
Map Vectorization:
@@ -298,10 +298,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: boolean), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (t [...]
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java
index d27323a..ec1f169 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
+import java.nio.file.NoSuchFileException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
@@ -132,9 +133,11 @@ public class MetaStoreSchemaInfo implements IMetaStoreSchemaInfo {
String initScriptName = INIT_FILE_PREFIX + toVersion + "." +
dbType + SQL_FILE_EXTENSION;
// check if the file exists
- if (!(new File(getMetaStoreScriptDir() + File.separatorChar +
- initScriptName).exists())) {
- throw new HiveMetaException("Unknown version specified for initialization: " + toVersion);
+ File file = new File(getMetaStoreScriptDir() + File.separatorChar +
+ initScriptName);
+ if (!file.exists()) {
+ throw new HiveMetaException("Unknown version specified for initialization: " + toVersion,
+ new NoSuchFileException(file.getAbsolutePath()));
}
return initScriptName;
}