You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/11/15 05:27:12 UTC
[10/10] hive git commit: HIVE-17934 : Merging Statistics are promoted
to COMPLETE (most of the time) (Zoltan Haindrich via Ashutosh Chauhan)
HIVE-17934 : Merging Statistics are promoted to COMPLETE (most of the time) (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de78ddb7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de78ddb7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de78ddb7
Branch: refs/heads/master
Commit: de78ddb7746624912f093c54a50f5b3f6a0dd876
Parents: 3bb46de
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Tue Oct 31 08:16:00 2017 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Nov 14 21:26:48 2017 -0800
----------------------------------------------------------------------
.../results/positive/accumulo_queries.q.out | 18 +-
.../test/results/positive/hbase_queries.q.out | 18 +-
.../src/test/results/positive/hbasestats.q.out | 8 +-
.../optimizer/spark/SparkMapJoinOptimizer.java | 17 +-
.../stats/annotation/StatsRulesProcFactory.java | 76 +--
.../apache/hadoop/hive/ql/plan/Statistics.java | 66 ++-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 14 +-
.../clientpositive/lateral_view_onview2.q | 6 +
.../clientpositive/stats_empty_partition2.q | 22 +
.../clientpositive/acid_table_stats.q.out | 8 +-
.../clientpositive/alterColumnStatsPart.q.out | 16 +-
.../clientpositive/annotate_stats_part.q.out | 4 +-
.../clientpositive/auto_sortmerge_join_12.q.out | 10 +-
.../test/results/clientpositive/cbo_const.q.out | 6 +-
.../results/clientpositive/cbo_input26.q.out | 48 +-
.../clientpositive/columnstats_partlvl_dp.q.out | 20 +-
.../clientpositive/columnstats_quoting.q.out | 16 +-
.../clientpositive/columnstats_tbllvl.q.out | 8 +-
.../test/results/clientpositive/constGby.q.out | 10 +-
.../clientpositive/constant_prop_3.q.out | 30 +-
.../results/clientpositive/constprog3.q.out | 4 +-
.../clientpositive/correlationoptimizer10.q.out | 46 +-
.../clientpositive/correlationoptimizer11.q.out | 16 +-
.../clientpositive/correlationoptimizer13.q.out | 14 +-
.../clientpositive/correlationoptimizer14.q.out | 24 +-
.../clientpositive/correlationoptimizer15.q.out | 18 +-
.../clientpositive/correlationoptimizer5.q.out | 22 +-
.../clientpositive/correlationoptimizer7.q.out | 16 +-
.../clientpositive/correlationoptimizer8.q.out | 30 +-
.../clientpositive/correlationoptimizer9.q.out | 24 +-
.../test/results/clientpositive/cte_mat_5.q.out | 4 +-
.../display_colstats_tbllvl.q.out | 8 +-
.../results/clientpositive/druid_basic2.q.out | 6 +-
.../results/clientpositive/empty_join.q.out | 4 +-
.../filter_cond_pushdown_HIVE_15647.q.out | 32 +-
.../results/clientpositive/groupby_sort_6.q.out | 10 +-
.../test/results/clientpositive/having2.q.out | 46 +-
.../test/results/clientpositive/input23.q.out | 8 +-
.../test/results/clientpositive/input26.q.out | 12 +-
.../join_cond_pushdown_unqual1.q.out | 26 +-
.../join_cond_pushdown_unqual2.q.out | 16 +-
.../join_cond_pushdown_unqual3.q.out | 26 +-
.../join_cond_pushdown_unqual4.q.out | 16 +-
.../test/results/clientpositive/join_view.q.out | 6 +-
.../clientpositive/lateral_view_onview.q.out | 32 +-
.../clientpositive/lateral_view_onview2.q.out | 169 ++++++
.../list_bucket_query_oneskew_2.q.out | 16 +-
.../llap/auto_sortmerge_join_12.q.out | 16 +-
.../clientpositive/llap/constprog_dpp.q.out | 8 +-
.../llap/dynamic_semijoin_reduction.q.out | 578 +++++++++----------
.../llap/dynamic_semijoin_reduction_sw.q.out | 52 +-
.../llap/dynamic_semijoin_user_level.q.out | 118 ++--
.../llap/dynpart_sort_optimization_acid.q.out | 24 +-
.../clientpositive/llap/llap_nullscan.q.out | 6 +-
.../clientpositive/llap/mapjoin_hint.q.out | 46 +-
.../clientpositive/llap/mapreduce1.q.out | 4 +-
.../clientpositive/llap/mapreduce2.q.out | 4 +-
.../clientpositive/llap/metadataonly1.q.out | 120 ++--
.../llap/reduce_deduplicate.q.out | 6 +-
.../clientpositive/llap/semijoin_hint.q.out | 450 +++++++--------
.../clientpositive/llap/subquery_in.q.out | 14 +-
.../clientpositive/llap/subquery_multi.q.out | 12 +-
.../clientpositive/llap/subquery_null_agg.q.out | 20 +-
.../clientpositive/llap/subquery_scalar.q.out | 18 +-
.../clientpositive/llap/subquery_select.q.out | 12 +-
.../clientpositive/llap/tez_smb_empty.q.out | 20 +-
.../llap/vector_windowing_gby2.q.out | 8 +-
.../llap/vector_windowing_streaming.q.out | 12 +-
.../llap/vectorization_short_regress.q.out | 8 +-
.../materialized_view_rewrite_ssb.q.out | 24 +-
.../materialized_view_rewrite_ssb_2.q.out | 24 +-
.../results/clientpositive/nullgroup3.q.out | 32 +-
.../results/clientpositive/nullgroup5.q.out | 8 +-
.../clientpositive/partial_column_stats.q.out | 8 +-
.../clientpositive/perf/spark/query66.q.out | 48 +-
.../clientpositive/perf/spark/query99.q.out | 20 +-
.../clientpositive/position_alias_test_1.q.out | 8 +-
.../clientpositive/ppd_outer_join5.q.out | 30 +-
.../clientpositive/ppd_repeated_alias.q.out | 6 +-
.../test/results/clientpositive/row__id.q.out | 8 +-
.../test/results/clientpositive/semijoin4.q.out | 12 +-
.../spark/auto_sortmerge_join_12.q.out | 10 +-
.../spark/join_cond_pushdown_unqual1.q.out | 26 +-
.../spark/join_cond_pushdown_unqual2.q.out | 16 +-
.../spark/join_cond_pushdown_unqual3.q.out | 26 +-
.../spark/join_cond_pushdown_unqual4.q.out | 16 +-
.../clientpositive/spark/join_view.q.out | 6 +-
.../spark/optimize_nullscan.q.out | 14 +-
.../clientpositive/spark/ppd_outer_join5.q.out | 30 +-
.../results/clientpositive/spark/semijoin.q.out | 8 +-
.../clientpositive/spark/smb_mapjoin_7.q.out | 6 +-
.../spark/spark_dynamic_partition_pruning.q.out | 26 +-
...k_vectorized_dynamic_partition_pruning.q.out | 26 +-
.../clientpositive/spark/subquery_in.q.out | 14 +-
.../clientpositive/spark/subquery_multi.q.out | 38 +-
.../spark/subquery_null_agg.q.out | 20 +-
.../clientpositive/spark/subquery_scalar.q.out | 32 +-
.../clientpositive/spark/subquery_select.q.out | 24 +-
.../clientpositive/spark/union_remove_25.q.out | 20 +-
.../clientpositive/spark/union_view.q.out | 140 ++---
.../vectorization_parquet_projection.q.out | 8 +-
.../spark/vectorization_short_regress.q.out | 16 +-
.../clientpositive/stats_empty_partition2.q.out | 166 ++++++
.../clientpositive/subquery_exists_having.q.out | 8 +-
.../subquery_unqualcolumnrefs.q.out | 4 +-
.../temp_table_display_colstats_tbllvl.q.out | 8 +-
.../clientpositive/union_remove_25.q.out | 24 +-
.../results/clientpositive/union_view.q.out | 184 +++---
.../vectorization_parquet_projection.q.out | 8 +-
109 files changed, 2095 insertions(+), 1685 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/accumulo-handler/src/test/results/positive/accumulo_queries.q.out
----------------------------------------------------------------------
diff --git a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out
index b3adf4e..bd79eef 100644
--- a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out
+++ b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out
@@ -175,11 +175,11 @@ STAGE PLANS:
0 UDFToDouble(_col0) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col1 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -194,19 +194,19 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
TopN Hash Memory Usage: 0.1
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Limit
Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 200 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 200 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -591,14 +591,14 @@ STAGE PLANS:
0 UDFToDouble(_col0) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), UDFToInteger(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat
output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/hbase-handler/src/test/results/positive/hbase_queries.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out
index b2eda12..d6ec14e 100644
--- a/hbase-handler/src/test/results/positive/hbase_queries.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out
@@ -175,11 +175,11 @@ STAGE PLANS:
0 UDFToDouble(_col0) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col1 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -194,19 +194,19 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
TopN Hash Memory Usage: 0.1
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Limit
Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 200 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 200 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -594,14 +594,14 @@ STAGE PLANS:
0 UDFToDouble(_col0) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), UDFToInteger(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/hbase-handler/src/test/results/positive/hbasestats.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbasestats.q.out b/hbase-handler/src/test/results/positive/hbasestats.q.out
index 29eefd4..5d000d2 100644
--- a/hbase-handler/src/test/results/positive/hbasestats.q.out
+++ b/hbase-handler/src/test/results/positive/hbasestats.q.out
@@ -358,20 +358,20 @@ STAGE PLANS:
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index 7a3fae6..8cedbe5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -214,8 +215,7 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos);
bigTablePosition = pos;
bigTableFound = true;
- bigInputStat = new Statistics();
- bigInputStat.setDataSize(Long.MAX_VALUE);
+ bigInputStat = new Statistics(0, Long.MAX_VALUE);
} else {
// Either we've found multiple big table branches, or the current branch cannot
// be a big table branch. Disable mapjoin for these cases.
@@ -236,13 +236,20 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
continue;
}
- Statistics currInputStat;
+ Statistics currInputStat = null;
if (useTsStats) {
- currInputStat = new Statistics();
// Find all root TSs and add up all data sizes
// Not adding other stats (e.g., # of rows, col stats) since only data size is used here
for (TableScanOperator root : OperatorUtils.findOperatorsUpstream(parentOp, TableScanOperator.class)) {
- currInputStat.addToDataSize(root.getStatistics().getDataSize());
+ if (currInputStat == null) {
+ try {
+ currInputStat = root.getStatistics().clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
+ } else {
+ currInputStat.addBasicStats(root.getStatistics());
+ }
}
} else {
currInputStat = parentOp.getStatistics();
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index a4f60ac..86b8724 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -53,7 +53,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ColumnStatsList;
-import org.apache.hadoop.hive.ql.parse.JoinType;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
@@ -74,6 +73,7 @@ import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.plan.Statistics.State;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -1610,9 +1610,11 @@ public class StatsRulesProcFactory {
long crossDataSize = 1;
long maxRowCount = 0;
long maxDataSize = 0;
+ State statsState = State.NONE;
for (Operator<? extends OperatorDesc> op : parents) {
Statistics ps = op.getStatistics();
+ statsState = Statistics.inferColumnStatsState(statsState, ps.getBasicStatsState());
long rowCount = ps.getNumRows();
long dataSize = ps.getDataSize();
// Update cross size
@@ -1648,13 +1650,18 @@ public class StatsRulesProcFactory {
newNumRows = crossRowCount;
newDataSize = crossDataSize;
} else {
- newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
- newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor);
+ if (numParents > 1) {
+ newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
+ newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor);
+ } else {
+ // MUX operator with 1 parent
+ newNumRows = StatsUtils.safeMult(maxRowCount, joinFactor);
+ newDataSize = StatsUtils.safeMult(maxDataSize, joinFactor);
+ }
}
- Statistics wcStats = new Statistics();
- wcStats.setNumRows(newNumRows);
- wcStats.setDataSize(newDataSize);
+ Statistics wcStats = new Statistics(newNumRows, newDataSize);
+ wcStats.setBasicStatsState(statsState);
// evaluate filter expression and update statistics
if (jop.getConf().getNoOuterJoin() &&
@@ -2083,6 +2090,7 @@ public class StatsRulesProcFactory {
newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
}
stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize));
+ stats.setBasicStatsState(State.COMPLETE);
}
private long computeFinalRowCount(List<Long> rowCountParents, long interimRowCount,
@@ -2260,17 +2268,9 @@ public class StatsRulesProcFactory {
// in the absence of column statistics, compute data size based on
// based on average row size
- Statistics wcStats = parentStats.clone();
limit = StatsUtils.getMaxIfOverflow(limit);
- if (limit <= parentStats.getNumRows()) {
- long numRows = limit;
- long avgRowSize = parentStats.getAvgRowSize();
- long dataSize = StatsUtils.safeMult(avgRowSize, limit);
- wcStats.setNumRows(numRows);
- wcStats.setDataSize(dataSize);
- }
+ Statistics wcStats = parentStats.scaleToRowCount(limit);
lop.setStatistics(wcStats);
-
if (LOG.isDebugEnabled()) {
LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString());
}
@@ -2366,30 +2366,34 @@ public class StatsRulesProcFactory {
if (conf != null) {
Statistics stats = conf.getStatistics();
- if (stats == null) {
- if (op.getParentOperators() != null) {
-
- // if parent statistics is null then that branch of the tree is not
- // walked yet. don't update the stats until all branches are walked
- if (isAllParentsContainStatistics(op)) {
- stats = new Statistics();
- for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
- if (parent.getStatistics() != null) {
- Statistics parentStats = parent.getStatistics();
- stats.addToNumRows(parentStats.getNumRows());
- stats.addToDataSize(parentStats.getDataSize());
- stats.updateColumnStatsState(parentStats.getColumnStatsState());
- List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(hconf,
- parentStats, op.getColumnExprMap(), op.getSchema());
- stats.addToColumnStats(colStats);
- op.getConf().setStatistics(stats);
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + op.toString() + ": " + stats.extendedToString());
- }
+ if (stats == null && op.getParentOperators() != null) {
+
+ // if parent statistics is null then that branch of the tree is not
+ // walked yet. don't update the stats until all branches are walked
+ if (isAllParentsContainStatistics(op)) {
+
+ for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
+ Statistics parentStats = parent.getStatistics();
+
+ if (stats == null) {
+ try {
+ stats = parentStats.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
}
+ } else {
+ stats.addBasicStats(parentStats);
+ }
+
+ stats.updateColumnStatsState(parentStats.getColumnStatsState());
+ List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(hconf, parentStats, op.getColumnExprMap(), op.getSchema());
+ stats.addToColumnStats(colStats);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + op.toString() + ": " + stats.extendedToString());
}
}
+ op.getConf().setStatistics(stats);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 8ffb4ce..82df960 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -23,6 +23,7 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -35,7 +36,11 @@ import com.google.common.collect.Maps;
public class Statistics implements Serializable {
public enum State {
- COMPLETE, PARTIAL, NONE
+ NONE, PARTIAL, COMPLETE;
+
+ boolean morePreciseThan(State other) {
+ return ordinal() >= other.ordinal();
+ }
}
private long numRows;
@@ -46,16 +51,17 @@ public class Statistics implements Serializable {
private State columnStatsState;
public Statistics() {
- this(0, 0, -1);
+ this(0, 0);
}
- public Statistics(long nr, long ds, long rnr) {
- this.setNumRows(nr);
- this.setDataSize(ds);
- this.setRunTimeNumRows(rnr);
- this.basicStatsState = State.NONE;
- this.columnStats = null;
- this.columnStatsState = State.NONE;
+ public Statistics(long nr, long ds) {
+ numRows = nr;
+ dataSize = ds;
+ runTimeNumRows = -1;
+ columnStats = null;
+ columnStatsState = State.NONE;
+
+ updateBasicStatsState();
}
public long getNumRows() {
@@ -64,7 +70,9 @@ public class Statistics implements Serializable {
public void setNumRows(long numRows) {
this.numRows = numRows;
- updateBasicStatsState();
+ if (dataSize == 0) {
+ updateBasicStatsState();
+ }
}
public long getDataSize() {
@@ -73,7 +81,9 @@ public class Statistics implements Serializable {
public void setDataSize(long dataSize) {
this.dataSize = dataSize;
- updateBasicStatsState();
+ if (dataSize == 0) {
+ updateBasicStatsState();
+ }
}
private void updateBasicStatsState() {
@@ -91,7 +101,10 @@ public class Statistics implements Serializable {
}
public void setBasicStatsState(State basicStatsState) {
- this.basicStatsState = basicStatsState;
+ updateBasicStatsState();
+ if (this.basicStatsState.morePreciseThan(basicStatsState)) {
+ this.basicStatsState = basicStatsState;
+ }
}
public State getColumnStatsState() {
@@ -155,7 +168,8 @@ public class Statistics implements Serializable {
@Override
public Statistics clone() throws CloneNotSupportedException {
- Statistics clone = new Statistics(numRows, dataSize, runTimeNumRows);
+ Statistics clone = new Statistics(numRows, dataSize);
+ clone.setRunTimeNumRows(runTimeNumRows);
clone.setBasicStatsState(basicStatsState);
clone.setColumnStatsState(columnStatsState);
if (columnStats != null) {
@@ -168,14 +182,15 @@ public class Statistics implements Serializable {
return clone;
}
- public void addToNumRows(long nr) {
- numRows += nr;
- updateBasicStatsState();
+ public void addBasicStats(Statistics stats) {
+ dataSize += stats.dataSize;
+ numRows += stats.numRows;
+ basicStatsState = inferColumnStatsState(basicStatsState, stats.basicStatsState);
}
+ @Deprecated
public void addToDataSize(long rds) {
dataSize += rds;
- updateBasicStatsState();
}
public void setColumnStats(Map<String, ColStatistics> colStats) {
@@ -284,4 +299,21 @@ public class Statistics implements Serializable {
public void setRunTimeNumRows(long runTimeNumRows) {
this.runTimeNumRows = runTimeNumRows;
}
+
+ public Statistics scaleToRowCount(long newRowCount) {
+ Statistics ret;
+ try {
+ ret = clone();
+ } catch (CloneNotSupportedException e) {
+ // FIXME: remove the Colneable usage
+ return new Statistics(0,0);
+ }
+ if(numRows == 0 || newRowCount >= numRows) {
+ return ret;
+ }
+ // FIXME: using real scaling by new/old ration might yield better results?
+ ret.numRows = newRowCount;
+ ret.dataSize = StatsUtils.safeMult(getAvgRowSize(), newRowCount);
+ return ret;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index ce7c96c..ed628ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -330,7 +330,7 @@ public class StatsUtils {
List<String> referencedColumns, boolean fetchColStats, boolean failIfCacheMiss)
throws HiveException {
- Statistics stats = new Statistics();
+ Statistics stats = null;
float deserFactor =
HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR);
@@ -342,7 +342,6 @@ public class StatsUtils {
// we would like to avoid file system calls if it too expensive
long ds = shouldEstimateStats? getDataSize(conf, table): getRawDataSize(table);
long nr = getNumRows(conf, schema, neededColumns, table, ds);
- stats.setNumRows(nr);
List<ColStatistics> colStats = Lists.newArrayList();
if (fetchColStats) {
colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache);
@@ -356,7 +355,7 @@ public class StatsUtils {
long betterDS = getDataSizeFromColumnStats(nr, colStats);
ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS;
}
- stats.setDataSize(ds);
+ stats = new Statistics(nr, ds);
// infer if any column can be primary key based on column statistics
inferAndSetPrimaryKey(stats.getNumRows(), colStats);
@@ -405,8 +404,7 @@ public class StatsUtils {
if (nr == 0) {
nr = 1;
}
- stats.addToNumRows(nr);
- stats.addToDataSize(ds);
+ stats = new Statistics(nr, ds);
// if at least a partition does not contain row count then mark basic stats state as PARTIAL
if (containsNonPositives(rowCounts) &&
@@ -488,6 +486,7 @@ public class StatsUtils {
// add partition column stats
addPartitionColumnStats(conf, partitionColsToRetrieve, schema, table, partList, columnStats);
+ // FIXME: this add seems suspicious...10 lines below the value returned by this method used as betterDS
stats.addToDataSize(getDataSizeFromColumnStats(nr, columnStats));
stats.updateColumnStatsState(deriveStatType(columnStats, referencedColumns));
@@ -525,6 +524,11 @@ public class StatsUtils {
}
}
+ if(rowCounts.size() == 0 ) {
+ // all partitions are filtered by partition pruning
+ stats.setBasicStatsState(State.COMPLETE);
+ }
+
// This block exists for debugging purposes: we want to check whether
// the col stats cache is working properly and we are retrieving the
// stats from metastore only once.
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/queries/clientpositive/lateral_view_onview2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/lateral_view_onview2.q b/ql/src/test/queries/clientpositive/lateral_view_onview2.q
new file mode 100644
index 0000000..c13f754
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/lateral_view_onview2.q
@@ -0,0 +1,6 @@
+CREATE TABLE lv_table( c1 STRING, c2 ARRAY<INT>, c3 INT, c4 CHAR(1));
+INSERT OVERWRITE TABLE lv_table SELECT 'abc ', array(1,2,3), 100, 't' FROM src;
+
+CREATE OR REPLACE VIEW lv_view AS SELECT * FROM lv_table;
+
+EXPLAIN SELECT myTable.myCol, myTable2.myCol2 FROM lv_view LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9;
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/queries/clientpositive/stats_empty_partition2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_empty_partition2.q b/ql/src/test/queries/clientpositive/stats_empty_partition2.q
new file mode 100644
index 0000000..5afab57
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_empty_partition2.q
@@ -0,0 +1,22 @@
+set hive.explain.user=false;
+
+drop table if exists p1;
+drop table if exists t;
+
+create table t (a int);
+insert into t values (1);
+
+create table p1 (a int) partitioned by (p int);
+
+insert into p1 partition (p=1) values (1);
+insert into p1 partition (p=2) values (1);
+
+truncate table p1;
+
+insert into p1 partition (p=1) values (1);
+
+explain
+select * from p1 join t on (t.a=p1.a);
+
+describe formatted p1;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/acid_table_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out
index 351ff0d..d0fbcac 100644
--- a/ql/src/test/results/clientpositive/acid_table_stats.q.out
+++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out
@@ -140,20 +140,20 @@ STAGE PLANS:
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
index 858e16f..fd87060 100644
--- a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
+++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
@@ -346,20 +346,20 @@ STAGE PLANS:
aggregations: max(a)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: int)
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -408,20 +408,20 @@ STAGE PLANS:
aggregations: max(a)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: int)
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 3a94a6a..fed2a65 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -142,11 +142,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 3338 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 7 Data size: 3338 Basic stats: PARTIAL Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 7 Data size: 2660 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 7 Data size: 3338 Basic stats: PARTIAL Column stats: PARTIAL
ListSink
PREHOOK: query: explain select * from loc_orc where year='2001'
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
index 7875e96..3d0559a 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
@@ -388,16 +388,16 @@ STAGE PLANS:
0
1
Position of Big Table: 0
- Statistics: Num rows: 255 Data size: 69177 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
@@ -611,13 +611,13 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/cbo_const.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_const.q.out b/ql/src/test/results/clientpositive/cbo_const.q.out
index e9f885b..cfc7f52 100644
--- a/ql/src/test/results/clientpositive/cbo_const.q.out
+++ b/ql/src/test/results/clientpositive/cbo_const.q.out
@@ -312,14 +312,14 @@ STAGE PLANS:
0 _col3 (type: string)
1 _col0 (type: string)
outputColumnNames: _col1, _col2, _col4
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/cbo_input26.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_input26.q.out b/ql/src/test/results/clientpositive/cbo_input26.q.out
index 77fc194..f4d78a4 100644
--- a/ql/src/test/results/clientpositive/cbo_input26.q.out
+++ b/ql/src/test/results/clientpositive/cbo_input26.q.out
@@ -59,28 +59,28 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -215,28 +215,28 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -371,28 +371,28 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -526,28 +526,28 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TableScan
Union
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 50 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 414b715..524af1c 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -92,12 +92,12 @@ STAGE PLANS:
keys: 4000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: 4000.0 (type: double), _col1 (type: string)
sort order: ++
Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string)
- Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
@@ -105,14 +105,14 @@ STAGE PLANS:
keys: 4000.0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), 4000.0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -337,12 +337,12 @@ STAGE PLANS:
keys: employeesalary (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 206 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
- Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 206 Basic stats: PARTIAL Column stats: NONE
value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
@@ -350,14 +350,14 @@ STAGE PLANS:
keys: KEY._col0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 99 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 99 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 99 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/columnstats_quoting.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
index 683c1e2..00fd199 100644
--- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
@@ -33,20 +33,20 @@ STAGE PLANS:
aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -93,20 +93,20 @@ STAGE PLANS:
aggregations: compute_stats(user id, 'hll')
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 440 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 440 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
index a2c6ead..6050694 100644
--- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
@@ -390,20 +390,20 @@ STAGE PLANS:
aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll')
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1848 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1848 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,counttrues:bigint,countfalses:bigint,countnulls:bigint>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1880 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1880 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/constGby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constGby.q.out b/ql/src/test/results/clientpositive/constGby.q.out
index c633624..ec57718 100644
--- a/ql/src/test/results/clientpositive/constGby.q.out
+++ b/ql/src/test/results/clientpositive/constGby.q.out
@@ -46,12 +46,12 @@ STAGE PLANS:
keys: 1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -59,14 +59,14 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: _col1 (type: bigint)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/constant_prop_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out
index cba4744..b754f8e 100644
--- a/ql/src/test/results/clientpositive/constant_prop_3.q.out
+++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out
@@ -230,7 +230,7 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
Reduce Operator Tree:
Join Operator
@@ -240,7 +240,7 @@ STAGE PLANS:
0
1
outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 17 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -256,7 +256,7 @@ STAGE PLANS:
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 17 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint)
TableScan
Reduce Output Operator
@@ -273,20 +273,20 @@ STAGE PLANS:
0 _col1 (type: int)
1 _col0 (type: int)
outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: ((_col6 = 0) or (_col9 is null and _col1 is not null and (_col7 >= _col6))) (type: boolean)
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int)
outputColumnNames: _col1, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: count(DISTINCT _col1)
keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -302,14 +302,14 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int)
sort order: ++++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col3:0._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -324,15 +324,15 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int)
sort order: -+++
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -355,17 +355,17 @@ STAGE PLANS:
aggregations: count(), count(s_suppkey)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), count(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/constprog3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog3.q.out b/ql/src/test/results/clientpositive/constprog3.q.out
index f54168d..9fc06d6 100644
--- a/ql/src/test/results/clientpositive/constprog3.q.out
+++ b/ql/src/test/results/clientpositive/constprog3.q.out
@@ -66,10 +66,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/de78ddb7/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
index a03acd3..b5d2fe7 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
@@ -267,17 +267,17 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 605 Data size: 6263 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 605 Data size: 6263 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: _col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 302 Data size: 3126 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 551 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 852 Data size: 8820 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -285,16 +285,16 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 937 Data size: 9702 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 937 Data size: 9702 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Mux Operator
- Statistics: Num rows: 551 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 852 Data size: 8820 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -302,10 +302,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 937 Data size: 9702 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 937 Data size: 9702 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -593,7 +593,7 @@ STAGE PLANS:
Demux Operator
Statistics: Num rows: 112 Data size: 1183 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 112 Data size: 1183 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 235 Data size: 2484 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -601,10 +601,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 258 Data size: 2732 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 258 Data size: 2732 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -616,9 +616,9 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 123 Data size: 1301 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 112 Data size: 1183 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 235 Data size: 2484 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -626,10 +626,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 258 Data size: 2732 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 258 Data size: 2732 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -925,7 +925,7 @@ STAGE PLANS:
Demux Operator
Statistics: Num rows: 165 Data size: 1752 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 165 Data size: 1752 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 346 Data size: 3679 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -933,10 +933,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 380 Data size: 4046 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 380 Data size: 4046 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -948,9 +948,9 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 181 Data size: 1927 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 165 Data size: 1752 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 346 Data size: 3679 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Left Semi Join 0 to 1
@@ -958,10 +958,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 380 Data size: 4046 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 380 Data size: 4046 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat