You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2017/12/05 09:59:33 UTC
[1/4] hive git commit: HIVE-18207: Fix the test failure for
TestCliDriver#vector_complex_join (Colin Ma via Zoltan Haindrich)
Repository: hive
Updated Branches:
refs/heads/master d041cc441 -> f63124188
HIVE-18207: Fix the test failure for TestCliDriver#vector_complex_join (Colin Ma via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8ac91e73
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8ac91e73
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8ac91e73
Branch: refs/heads/master
Commit: 8ac91e7317eebb3f36c12b84fa2b447de827dffb
Parents: d041cc4
Author: Colin Ma <ju...@intel.com>
Authored: Tue Dec 5 10:40:43 2017 +0100
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Tue Dec 5 10:40:43 2017 +0100
----------------------------------------------------------------------
ql/src/test/results/clientpositive/vector_complex_join.q.out | 4 ++++
1 file changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8ac91e73/ql/src/test/results/clientpositive/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out
index 7677697..2832096 100644
--- a/ql/src/test/results/clientpositive/vector_complex_join.q.out
+++ b/ql/src/test/results/clientpositive/vector_complex_join.q.out
@@ -229,6 +229,8 @@ STAGE PLANS:
0 a (type: int)
1 a[1] (type: int)
Map Join Vectorization:
+ bigTableKeyExpressions: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int
+ bigTableValueExpressions: col 0:array<int>, col 1:int
className: VectorMapJoinOperator
native: false
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
@@ -342,6 +344,8 @@ STAGE PLANS:
0 a (type: int)
1 a[index] (type: int)
Map Join Vectorization:
+ bigTableKeyExpressions: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int
+ bigTableValueExpressions: col 0:array<int>, col 1:int
className: VectorMapJoinOperator
native: false
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
[3/4] hive git commit: HIVE-18036: Stats: Remove usage of clone()
methods (Bertalan Kondrat via Zoltan Haindrich)
Posted by kg...@apache.org.
HIVE-18036: Stats: Remove usage of clone() methods (Bertalan Kondrat via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/63f2ec19
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/63f2ec19
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/63f2ec19
Branch: refs/heads/master
Commit: 63f2ec1902016537fcf83262bfcda1d604b236d8
Parents: c673041
Author: Bertalan Kondrat <kb...@gmail.com>
Authored: Tue Dec 5 10:44:34 2017 +0100
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Tue Dec 5 10:44:34 2017 +0100
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 2 -
.../optimizer/spark/SparkMapJoinOptimizer.java | 4 -
.../stats/annotation/StatsRulesProcFactory.java | 557 +++++++++----------
.../hadoop/hive/ql/plan/ColStatistics.java | 2 +-
.../apache/hadoop/hive/ql/plan/Statistics.java | 9 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 21 +-
6 files changed, 262 insertions(+), 333 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 2f7284f..6b949d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -527,8 +527,6 @@ public enum ErrorMsg {
COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not supported "
+ "for partition columns"),
- STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"),
-
STATSAGGREGATOR_SOURCETASK_NULL(30014, "SourceTask of StatsTask should not be null"),
STATSAGGREGATOR_CONNECTION_ERROR(30015,
"Stats aggregator of type {0} cannot be connected to", true),
http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index 8cedbe5..8425911 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -242,11 +242,7 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
// Not adding other stats (e.g., # of rows, col stats) since only data size is used here
for (TableScanOperator root : OperatorUtils.findOperatorsUpstream(parentOp, TableScanOperator.class)) {
if (currInputStat == null) {
- try {
currInputStat = root.getStatistics().clone();
- } catch (CloneNotSupportedException e) {
- throw new RuntimeException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
- }
} else {
currInputStat.addBasicStats(root.getStatistics());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 86b8724..fcfdce9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -138,8 +138,6 @@ public class StatsRulesProcFactory {
LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " +
stats.extendedToString());
}
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
} catch (HiveException e) {
LOG.debug("Failed to retrieve stats ",e);
throw new SemanticException(e);
@@ -177,41 +175,33 @@ public class StatsRulesProcFactory {
Statistics stats = null;
if (parentStats != null) {
- try {
- stats = parentStats.clone();
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
- }
+ stats = parentStats.clone();
}
- try {
- if (satisfyPrecondition(parentStats)) {
- // this will take care of mapping between input column names and output column names. The
- // returned column stats will have the output column names.
- List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
- sop.getColumnExprMap(), sop.getSchema());
- stats.setColumnStats(colStats);
- // in case of select(*) the data size does not change
- if (!sop.getConf().isSelectStar() && !sop.getConf().isSelStarNoCompute()) {
- long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats);
- stats.setDataSize(dataSize);
- }
- sop.setStatistics(stats);
+ if (satisfyPrecondition(parentStats)) {
+ // this will take care of mapping between input column names and output column names. The
+ // returned column stats will have the output column names.
+ List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
+ sop.getColumnExprMap(), sop.getSchema());
+ stats.setColumnStats(colStats);
+ // in case of select(*) the data size does not change
+ if (!sop.getConf().isSelectStar() && !sop.getConf().isSelStarNoCompute()) {
+ long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats);
+ stats.setDataSize(dataSize);
+ }
+ sop.setStatistics(stats);
- if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString());
- }
- } else {
- if (parentStats != null) {
- sop.setStatistics(parentStats.clone());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString());
+ }
+ } else {
+ if (parentStats != null) {
+ sop.setStatistics(parentStats.clone());
- if (LOG.isDebugEnabled()) {
- LOG.debug("[1] STATS-" + sop.toString() + ": " + parentStats.extendedToString());
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[1] STATS-" + sop.toString() + ": " + parentStats.extendedToString());
}
}
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
}
return null;
}
@@ -276,51 +266,48 @@ public class StatsRulesProcFactory {
neededCols = tsop.getNeededColumns();
}
- try {
- if (parentStats != null) {
- ExprNodeDesc pred = fop.getConf().getPredicate();
- // evaluate filter expression and update statistics
- long newNumRows = evaluateExpression(parentStats, pred, aspCtx,
- neededCols, fop, parentStats.getNumRows());
- Statistics st = parentStats.clone();
-
- if (satisfyPrecondition(parentStats)) {
-
- // update statistics based on column statistics.
- // OR conditions keeps adding the stats independently, this may
- // result in number of rows getting more than the input rows in
- // which case stats need not be updated
- if (newNumRows <= parentStats.getNumRows()) {
- updateStats(st, newNumRows, true, fop);
- }
+ if (parentStats != null) {
+ ExprNodeDesc pred = fop.getConf().getPredicate();
- if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString());
- }
- } else {
+ // evaluate filter expression and update statistics
+ long newNumRows = evaluateExpression(parentStats, pred, aspCtx,
+ neededCols, fop, parentStats.getNumRows());
+ Statistics st = parentStats.clone();
- // update only the basic statistics in the absence of column statistics
- if (newNumRows <= parentStats.getNumRows()) {
- updateStats(st, newNumRows, false, fop);
- }
+ if (satisfyPrecondition(parentStats)) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString());
- }
+ // update statistics based on column statistics.
+ // OR conditions keeps adding the stats independently, this may
+ // result in number of rows getting more than the input rows in
+ // which case stats need not be updated
+ if (newNumRows <= parentStats.getNumRows()) {
+ updateStats(st, newNumRows, true, fop);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString());
+ }
+ } else {
+
+ // update only the basic statistics in the absence of column statistics
+ if (newNumRows <= parentStats.getNumRows()) {
+ updateStats(st, newNumRows, false, fop);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString());
}
- fop.setStatistics(st);
- aspCtx.setAndExprStats(null);
}
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ fop.setStatistics(st);
+ aspCtx.setAndExprStats(null);
}
return null;
}
protected long evaluateExpression(Statistics stats, ExprNodeDesc pred,
AnnotateStatsProcCtx aspCtx, List<String> neededCols,
- Operator<?> op, long currNumRows) throws CloneNotSupportedException, SemanticException {
+ Operator<?> op, long currNumRows) throws SemanticException {
long newNumRows = 0;
Statistics andStats = null;
@@ -505,7 +492,7 @@ public class StatsRulesProcFactory {
}
private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx,
- List<String> neededCols, Operator<?> op) throws SemanticException, CloneNotSupportedException {
+ List<String> neededCols, Operator<?> op) throws SemanticException {
final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
final boolean invert = Boolean.TRUE.equals(
((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not)
@@ -538,7 +525,7 @@ public class StatsRulesProcFactory {
private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long currNumRows,
AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op)
- throws CloneNotSupportedException, SemanticException {
+ throws SemanticException {
long numRows = currNumRows;
@@ -837,7 +824,7 @@ public class StatsRulesProcFactory {
private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
AnnotateStatsProcCtx aspCtx, List<String> neededCols,
- Operator<?> op, long currNumRows) throws CloneNotSupportedException, SemanticException {
+ Operator<?> op, long currNumRows) throws SemanticException {
long numRows = currNumRows;
@@ -1066,210 +1053,206 @@ public class StatsRulesProcFactory {
containsGroupingSet + " sizeOfGroupingSet: " + sizeOfGroupingSet);
}
- try {
- // satisfying precondition means column statistics is available
- if (satisfyPrecondition(parentStats)) {
+ // satisfying precondition means column statistics is available
+ if (satisfyPrecondition(parentStats)) {
- // check if map side aggregation is possible or not based on column stats
- hashAgg = checkMapSideAggregation(gop, colStats, conf);
+ // check if map side aggregation is possible or not based on column stats
+ hashAgg = checkMapSideAggregation(gop, colStats, conf);
- if (LOG.isDebugEnabled()) {
- LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg);
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg);
+ }
- stats = parentStats.clone();
- stats.setColumnStats(colStats);
- long ndvProduct = 1;
- final long parentNumRows = stats.getNumRows();
+ stats = parentStats.clone();
+ stats.setColumnStats(colStats);
+ long ndvProduct = 1;
+ final long parentNumRows = stats.getNumRows();
- // compute product of distinct values of grouping columns
- for (ColStatistics cs : colStats) {
- if (cs != null) {
- long ndv = cs.getCountDistint();
- if (cs.getNumNulls() > 0) {
- ndv = StatsUtils.safeAdd(ndv, 1);
- }
- ndvProduct = StatsUtils.safeMult(ndvProduct, ndv);
+ // compute product of distinct values of grouping columns
+ for (ColStatistics cs : colStats) {
+ if (cs != null) {
+ long ndv = cs.getCountDistint();
+ if (cs.getNumNulls() > 0) {
+ ndv = StatsUtils.safeAdd(ndv, 1);
+ }
+ ndvProduct = StatsUtils.safeMult(ndvProduct, ndv);
+ } else {
+ if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
+ // the column must be an aggregate column inserted by GBY. We
+ // don't have to account for this column when computing product
+ // of NDVs
+ continue;
} else {
- if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
- // the column must be an aggregate column inserted by GBY. We
- // don't have to account for this column when computing product
- // of NDVs
- continue;
- } else {
- // partial column statistics on grouping attributes case.
- // if column statistics on grouping attribute is missing, then
- // assume worst case.
- // GBY rule will emit half the number of rows if ndvProduct is 0
- ndvProduct = 0;
- }
- break;
+ // partial column statistics on grouping attributes case.
+ // if column statistics on grouping attribute is missing, then
+ // assume worst case.
+ // GBY rule will emit half the number of rows if ndvProduct is 0
+ ndvProduct = 0;
}
+ break;
}
+ }
- // if ndvProduct is 0 then column stats state must be partial and we are missing
- // column stats for a group by column
- if (ndvProduct == 0) {
- ndvProduct = parentNumRows / 2;
+ // if ndvProduct is 0 then column stats state must be partial and we are missing
+ // column stats for a group by column
+ if (ndvProduct == 0) {
+ ndvProduct = parentNumRows / 2;
- if (LOG.isDebugEnabled()) {
- LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as some column does not" +
- " have stats. ndvProduct changed to: " + ndvProduct);
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as some column does not" +
+ " have stats. ndvProduct changed to: " + ndvProduct);
}
+ }
- if (interReduction) {
-
- if (hashAgg) {
- if (containsGroupingSet) {
- // Case 4: column stats, hash aggregation, grouping sets
- cardinality = Math.min(
- (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 2,
- StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, parallelism), sizeOfGroupingSet));
+ if (interReduction) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 4] STATS-" + gop.toString() + ": cardinality: " + cardinality);
- }
- } else {
- // Case 3: column stats, hash aggregation, NO grouping sets
- cardinality = Math.min(parentNumRows / 2, StatsUtils.safeMult(ndvProduct, parallelism));
+ if (hashAgg) {
+ if (containsGroupingSet) {
+ // Case 4: column stats, hash aggregation, grouping sets
+ cardinality = Math.min(
+ (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 2,
+ StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, parallelism), sizeOfGroupingSet));
- if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 3] STATS-" + gop.toString() + ": cardinality: " + cardinality);
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[Case 4] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
} else {
- if (containsGroupingSet) {
- // Case 6: column stats, NO hash aggregation, grouping sets
- cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet);
+ // Case 3: column stats, hash aggregation, NO grouping sets
+ cardinality = Math.min(parentNumRows / 2, StatsUtils.safeMult(ndvProduct, parallelism));
- if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 6] STATS-" + gop.toString() + ": cardinality: " + cardinality);
- }
- } else {
- // Case 5: column stats, NO hash aggregation, NO grouping sets
- cardinality = parentNumRows;
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 5] STATS-" + gop.toString() + ": cardinality: " + cardinality);
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[Case 3] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
}
} else {
-
- // in reduce side GBY, we don't know if the grouping set was present or not. so get it
- // from map side GBY
- GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class);
- if (mGop != null) {
- containsGroupingSet = mGop.getConf().isGroupingSetsPresent();
- }
-
if (containsGroupingSet) {
- // Case 8: column stats, grouping sets
- sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size();
- cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet));
+ // Case 6: column stats, NO hash aggregation, grouping sets
+ cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet);
if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: " + cardinality);
+ LOG.debug("[Case 6] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
} else {
- // Case 9: column stats, NO grouping sets
- cardinality = Math.min(parentNumRows, ndvProduct);
+ // Case 5: column stats, NO hash aggregation, NO grouping sets
+ cardinality = parentNumRows;
if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality);
+ LOG.debug("[Case 5] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
}
}
-
- // update stats, but don't update NDV as it will not change
- updateStats(stats, cardinality, true, gop, false);
} else {
- // NO COLUMN STATS
- if (parentStats != null) {
+ // in reduce side GBY, we don't know if the grouping set was present or not. so get it
+ // from map side GBY
+ GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class);
+ if (mGop != null) {
+ containsGroupingSet = mGop.getConf().isGroupingSetsPresent();
+ }
+
+ if (containsGroupingSet) {
+ // Case 8: column stats, grouping sets
+ sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size();
+ cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet));
- stats = parentStats.clone();
- final long parentNumRows = stats.getNumRows();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: " + cardinality);
+ }
+ } else {
+ // Case 9: column stats, NO grouping sets
+ cardinality = Math.min(parentNumRows, ndvProduct);
- // if we don't have column stats, we just assume hash aggregation is disabled
- if (interReduction) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality);
+ }
+ }
+ }
- if (containsGroupingSet) {
- // Case 2: NO column stats, NO hash aggregation, grouping sets
- cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet);
+ // update stats, but don't update NDV as it will not change
+ updateStats(stats, cardinality, true, gop, false);
+ } else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 2] STATS-" + gop.toString() + ": cardinality: " + cardinality);
- }
- } else {
- // Case 1: NO column stats, NO hash aggregation, NO grouping sets
- cardinality = parentNumRows;
+ // NO COLUMN STATS
+ if (parentStats != null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 1] STATS-" + gop.toString() + ": cardinality: " + cardinality);
- }
+ stats = parentStats.clone();
+ final long parentNumRows = stats.getNumRows();
+
+ // if we don't have column stats, we just assume hash aggregation is disabled
+ if (interReduction) {
+
+ if (containsGroupingSet) {
+ // Case 2: NO column stats, NO hash aggregation, grouping sets
+ cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[Case 2] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
} else {
-
- // Case 7: NO column stats
- cardinality = parentNumRows / 2;
+ // Case 1: NO column stats, NO hash aggregation, NO grouping sets
+ cardinality = parentNumRows;
if (LOG.isDebugEnabled()) {
- LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: " + cardinality);
+ LOG.debug("[Case 1] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
}
+ } else {
- updateStats(stats, cardinality, false, gop);
- }
- }
+ // Case 7: NO column stats
+ cardinality = parentNumRows / 2;
- // if UDAFs are present, new columns needs to be added
- if (!aggDesc.isEmpty() && stats != null) {
- List<ColStatistics> aggColStats = Lists.newArrayList();
- for (ColumnInfo ci : rs.getSignature()) {
-
- // if the columns in row schema is not contained in column
- // expression map, then those are the aggregate columns that
- // are added GBY operator. we will estimate the column statistics
- // for those newly added columns
- if (!colExprMap.containsKey(ci.getInternalName())) {
- String colName = ci.getInternalName();
- String colType = ci.getTypeName();
- ColStatistics cs = new ColStatistics(colName, colType);
- cs.setCountDistint(stats.getNumRows());
- cs.setNumNulls(0);
- cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType));
- aggColStats.add(cs);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: " + cardinality);
}
}
- // add the new aggregate column and recompute data size
- if (aggColStats.size() > 0) {
- stats.addToColumnStats(aggColStats);
+ updateStats(stats, cardinality, false, gop);
+ }
+ }
- // only if the column stats is available, update the data size from
- // the column stats
- if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
- updateStats(stats, stats.getNumRows(), true, gop);
- }
- }
+ // if UDAFs are present, new columns needs to be added
+ if (!aggDesc.isEmpty() && stats != null) {
+ List<ColStatistics> aggColStats = Lists.newArrayList();
+ for (ColumnInfo ci : rs.getSignature()) {
- // if UDAF present and if column expression map is empty then it must
- // be full aggregation query like count(*) in which case number of
- // rows will be 1
- if (colExprMap.isEmpty()) {
- updateStats(stats, 1, true, gop);
+ // if the columns in row schema is not contained in column
+ // expression map, then those are the aggregate columns that
+ // are added GBY operator. we will estimate the column statistics
+ // for those newly added columns
+ if (!colExprMap.containsKey(ci.getInternalName())) {
+ String colName = ci.getInternalName();
+ String colType = ci.getTypeName();
+ ColStatistics cs = new ColStatistics(colName, colType);
+ cs.setCountDistint(stats.getNumRows());
+ cs.setNumNulls(0);
+ cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType));
+ aggColStats.add(cs);
}
}
- gop.setStatistics(stats);
+ // add the new aggregate column and recompute data size
+ if (aggColStats.size() > 0) {
+ stats.addToColumnStats(aggColStats);
+
+ // only if the column stats is available, update the data size from
+ // the column stats
+ if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
+ updateStats(stats, stats.getNumRows(), true, gop);
+ }
+ }
- if (LOG.isDebugEnabled() && stats != null) {
- LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString());
+ // if UDAF present and if column expression map is empty then it must
+ // be full aggregation query like count(*) in which case number of
+ // rows will be 1
+ if (colExprMap.isEmpty()) {
+ updateStats(stats, 1, true, gop);
}
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
+
+ gop.setStatistics(stats);
+
+ if (LOG.isDebugEnabled() && stats != null) {
+ LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString());
}
return null;
}
@@ -1470,11 +1453,7 @@ public class StatsRulesProcFactory {
for (int pos = 0; pos < parents.size(); pos++) {
ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
Statistics parentStats;
- try {
- parentStats = parent.getStatistics().clone();
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
- }
+ parentStats = parent.getStatistics().clone();
keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf()
.getOutputKeyColumnNames());
@@ -1581,12 +1560,8 @@ public class StatsRulesProcFactory {
pred = jop.getConf().getResidualFilterExprs().get(0);
}
// evaluate filter expression and update statistics
- try {
- newNumRows = evaluateExpression(stats, pred,
- aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows());
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
- }
+ newNumRows = evaluateExpression(stats, pred,
+ aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows());
// update statistics based on column statistics.
// OR conditions keeps adding the stats independently, this may
// result in number of rows getting more than the input rows in
@@ -1677,12 +1652,8 @@ public class StatsRulesProcFactory {
pred = jop.getConf().getResidualFilterExprs().get(0);
}
// evaluate filter expression and update statistics
- try {
newNumRows = evaluateExpression(wcStats, pred,
aspCtx, jop.getSchema().getColumnNames(), jop, wcStats.getNumRows());
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
- }
// update only the basic statistics in the absence of column statistics
if (newNumRows <= joinRowCount) {
updateStats(wcStats, newNumRows, false, jop);
@@ -2242,42 +2213,37 @@ public class StatsRulesProcFactory {
LimitOperator lop = (LimitOperator) nd;
Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
Statistics parentStats = parent.getStatistics();
+ long limit = -1;
+ limit = lop.getConf().getLimit();
- try {
- long limit = -1;
- limit = lop.getConf().getLimit();
+ if (satisfyPrecondition(parentStats)) {
+ Statistics stats = parentStats.clone();
+ List<ColStatistics> colStats = StatsUtils.getColStatisticsUpdatingTableAlias(
+ parentStats, lop.getSchema());
+ stats.setColumnStats(colStats);
- if (satisfyPrecondition(parentStats)) {
- Statistics stats = parentStats.clone();
- List<ColStatistics> colStats = StatsUtils.getColStatisticsUpdatingTableAlias(
- parentStats, lop.getSchema());
- stats.setColumnStats(colStats);
-
- // if limit is greater than available rows then do not update
- // statistics
- if (limit <= parentStats.getNumRows()) {
- updateStats(stats, limit, true, lop);
- }
- lop.setStatistics(stats);
+ // if limit is greater than available rows then do not update
+ // statistics
+ if (limit <= parentStats.getNumRows()) {
+ updateStats(stats, limit, true, lop);
+ }
+ lop.setStatistics(stats);
- if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString());
- }
- } else {
- if (parentStats != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString());
+ }
+ } else {
+ if (parentStats != null) {
- // in the absence of column statistics, compute data size based on
- // based on average row size
- limit = StatsUtils.getMaxIfOverflow(limit);
- Statistics wcStats = parentStats.scaleToRowCount(limit);
- lop.setStatistics(wcStats);
- if (LOG.isDebugEnabled()) {
- LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString());
- }
+ // in the absence of column statistics, compute data size based on
+ // based on average row size
+ limit = StatsUtils.getMaxIfOverflow(limit);
+ Statistics wcStats = parentStats.scaleToRowCount(limit);
+ lop.setStatistics(wcStats);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString());
}
}
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
}
return null;
}
@@ -2302,48 +2268,43 @@ public class StatsRulesProcFactory {
if (parentStats != null) {
AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
HiveConf conf = aspCtx.getConf();
-
List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames();
List<String> outValueColNames = rop.getConf().getOutputValueColumnNames();
Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap();
- try {
- Statistics outStats = parentStats.clone();
- if (satisfyPrecondition(parentStats)) {
- List<ColStatistics> colStats = Lists.newArrayList();
- for (String key : outKeyColNames) {
- String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + key;
- ExprNodeDesc end = colExprMap.get(prefixedKey);
- if (end != null) {
- ColStatistics cs = StatsUtils
- .getColStatisticsFromExpression(conf, parentStats, end);
- if (cs != null) {
- cs.setColumnName(prefixedKey);
- colStats.add(cs);
- }
+ Statistics outStats = parentStats.clone();
+ if (satisfyPrecondition(parentStats)) {
+ List<ColStatistics> colStats = Lists.newArrayList();
+ for (String key : outKeyColNames) {
+ String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + key;
+ ExprNodeDesc end = colExprMap.get(prefixedKey);
+ if (end != null) {
+ ColStatistics cs = StatsUtils
+ .getColStatisticsFromExpression(conf, parentStats, end);
+ if (cs != null) {
+ cs.setColumnName(prefixedKey);
+ colStats.add(cs);
}
}
+ }
- for (String val : outValueColNames) {
- String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." + val;
- ExprNodeDesc end = colExprMap.get(prefixedVal);
- if (end != null) {
- ColStatistics cs = StatsUtils
- .getColStatisticsFromExpression(conf, parentStats, end);
- if (cs != null) {
- cs.setColumnName(prefixedVal);
- colStats.add(cs);
- }
+ for (String val : outValueColNames) {
+ String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." + val;
+ ExprNodeDesc end = colExprMap.get(prefixedVal);
+ if (end != null) {
+ ColStatistics cs = StatsUtils
+ .getColStatisticsFromExpression(conf, parentStats, end);
+ if (cs != null) {
+ cs.setColumnName(prefixedVal);
+ colStats.add(cs);
}
}
-
- outStats.setColumnStats(colStats);
- }
- rop.setStatistics(outStats);
- if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString());
}
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+
+ outStats.setColumnStats(colStats);
+ }
+ rop.setStatistics(outStats);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString());
}
}
return null;
@@ -2376,11 +2337,7 @@ public class StatsRulesProcFactory {
Statistics parentStats = parent.getStatistics();
if (stats == null) {
- try {
- stats = parentStats.clone();
- } catch (CloneNotSupportedException e) {
- throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
- }
+ stats = parentStats.clone();
} else {
stats.addBasicStats(parentStats);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
index 1aafa9e..aa0559d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
@@ -140,7 +140,7 @@ public class ColStatistics {
}
@Override
- public ColStatistics clone() throws CloneNotSupportedException {
+ public ColStatistics clone() {
ColStatistics clone = new ColStatistics(colName, colType);
clone.setAvgColLen(avgColLen);
clone.setCountDistint(countDistint);
http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 82df960..013fccc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -167,7 +167,7 @@ public class Statistics implements Serializable {
}
@Override
- public Statistics clone() throws CloneNotSupportedException {
+ public Statistics clone() {
Statistics clone = new Statistics(numRows, dataSize);
clone.setRunTimeNumRows(runTimeNumRows);
clone.setBasicStatsState(basicStatsState);
@@ -302,12 +302,7 @@ public class Statistics implements Serializable {
public Statistics scaleToRowCount(long newRowCount) {
Statistics ret;
- try {
- ret = clone();
- } catch (CloneNotSupportedException e) {
- // FIXME: remove the Colneable usage
- return new Statistics(0,0);
- }
+ ret = clone();
if(numRows == 0 || newRowCount >= numRows) {
return ret;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 20c2f94..e42614c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1556,11 +1556,7 @@ public class StatsUtils {
for (ColStatistics parentColStat : parentStats.getColumnStats()) {
ColStatistics colStat;
- try {
- colStat = parentColStat.clone();
- } catch (CloneNotSupportedException e) {
- colStat = null;
- }
+ colStat = parentColStat.clone();
if (colStat != null) {
cs.add(colStat);
}
@@ -1604,11 +1600,7 @@ public class StatsUtils {
ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
if (colStats != null) {
/* If statistics for the column already exist use it. */
- try {
return colStats.clone();
- } catch (CloneNotSupportedException e) {
- return null;
- }
}
// virtual columns
@@ -1619,11 +1611,7 @@ public class StatsUtils {
// clone the column stats and return
ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
if (result != null) {
- try {
return result.clone();
- } catch (CloneNotSupportedException e) {
- return null;
- }
}
return null;
}
@@ -1651,12 +1639,7 @@ public class StatsUtils {
ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
if (stats != null) {
ColStatistics newStats;
- try {
- newStats = stats.clone();
- } catch (CloneNotSupportedException e) {
- LOG.warn("error cloning stats, this should not happen");
- return null;
- }
+ newStats = stats.clone();
newStats.setColumnName(colName);
colType = colType.toLowerCase();
newStats.setColumnType(colType);
[4/4] hive git commit: HIVE-18005: Improve size estimation for
array() to be not 0 (Zoltan Haindrich, reviewed by Vineet Garg)
Posted by kg...@apache.org.
HIVE-18005: Improve size estimation for array() to be not 0 (Zoltan Haindrich, reviewed by Vineet Garg)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f6312418
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f6312418
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f6312418
Branch: refs/heads/master
Commit: f63124188ef0965d85ed0af315cab840d1e9af3f
Parents: 63f2ec1
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Tue Dec 5 10:48:38 2017 +0100
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Tue Dec 5 10:48:38 2017 +0100
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 21 +--
.../hadoop/hive/ql/stats/TestStatsUtils.java | 35 ++++-
.../clientpositive/array_size_estimation.q | 16 ++
.../clientpositive/array_size_estimation.q.out | 155 +++++++++++++++++++
.../beeline/select_dummy_source.q.out | 14 +-
.../clientpositive/lateral_view_onview.q.out | 40 ++---
.../clientpositive/lateral_view_onview2.q.out | 40 ++---
.../clientpositive/select_dummy_source.q.out | 14 +-
.../test/results/clientpositive/udf_array.q.out | 2 +-
.../results/clientpositive/udf_sort_array.q.out | 2 +-
.../test/results/clientpositive/udf_split.q.out | 2 +-
.../hive/common/type/HiveIntervalDayTime.java | 4 +-
.../hadoop/hive/ql/util/JavaDataModel.java | 2 +-
13 files changed, 275 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index e42614c..05c9380 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1234,10 +1234,6 @@ public class StatsUtils {
/**
* Get the size of complex data types
- * @param conf
- * - hive conf
- * @param oi
- * - object inspector
* @return raw data size
*/
public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) {
@@ -1271,7 +1267,7 @@ public class StatsUtils {
// check if list elements are primitive or Objects
ObjectInspector leoi = scloi.getListElementObjectInspector();
if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
- result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length);
+ result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length, conf);
} else {
result += JavaDataModel.get().lengthForObjectArrayOfSize(length);
}
@@ -1373,13 +1369,9 @@ public class StatsUtils {
/**
* Get the size of arrays of primitive types
- * @param colType
- * - column type
- * @param length
- * - array length
* @return raw data size
*/
- public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) {
+ public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length, HiveConf conf) {
String colTypeLowerCase = colType.toLowerCase();
if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
|| colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
@@ -1396,12 +1388,21 @@ public class StatsUtils {
} else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
return JavaDataModel.get().lengthForBooleanArrayOfSize(length);
} else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) ||
+ colTypeLowerCase.equals(serdeConstants.DATETIME_TYPE_NAME) ||
+ colTypeLowerCase.equals(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME) ||
+ colTypeLowerCase.equals(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME) ||
colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) {
return JavaDataModel.get().lengthForTimestampArrayOfSize(length);
} else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
return JavaDataModel.get().lengthForDateArrayOfSize(length);
} else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
return JavaDataModel.get().lengthForDecimalArrayOfSize(length);
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ int configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
+ int siz = JavaDataModel.get().lengthForStringOfLength(configVarLen);
+ return JavaDataModel.get().lengthForPrimitiveArrayOfSize(siz, length);
} else {
return 0;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
index eee9a31..9699bcc 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
@@ -18,11 +18,20 @@
package org.apache.hadoop.hive.ql.stats;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.Set;
+
+import org.apache.commons.lang.reflect.FieldUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.junit.Test;
+import org.spark_project.guava.collect.Sets;
public class TestStatsUtils {
@@ -47,7 +56,6 @@ public class TestStatsUtils {
checkCombinedRange(false, new Range(11, 12), new Range(0, 10));
}
-
private void checkCombinedRange(boolean valid, Range r1, Range r2) {
Range r3a = StatsUtils.combineRange(r1, r2);
Range r3b = StatsUtils.combineRange(r2, r1);
@@ -67,5 +75,30 @@ public class TestStatsUtils {
return m <= v && v <= M;
}
+ @Test
+ public void testPrimitiveSizeEstimations() throws Exception {
+ HiveConf conf = new HiveConf();
+ Set<String> exclusions = Sets.newHashSet();
+ exclusions.add(serdeConstants.VOID_TYPE_NAME);
+ exclusions.add(serdeConstants.LIST_TYPE_NAME);
+ exclusions.add(serdeConstants.MAP_TYPE_NAME);
+ exclusions.add(serdeConstants.STRUCT_TYPE_NAME);
+ exclusions.add(serdeConstants.UNION_TYPE_NAME);
+ Field[] serdeFields = serdeConstants.class.getFields();
+ for (Field field : serdeFields) {
+ if (!Modifier.isStatic(field.getModifiers())) {
+ continue;
+ }
+ if (!field.getName().endsWith("_TYPE_NAME")) {
+ continue;
+ }
+ String typeName = (String) FieldUtils.readStaticField(field);
+ if (exclusions.contains(typeName)) {
+ continue;
+ }
+ long siz = StatsUtils.getSizeOfPrimitiveTypeArraysFromType(typeName, 3, conf);
+ assertNotEquals(field.toString(), 0, siz);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/queries/clientpositive/array_size_estimation.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/array_size_estimation.q b/ql/src/test/queries/clientpositive/array_size_estimation.q
new file mode 100644
index 0000000..74713c4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/array_size_estimation.q
@@ -0,0 +1,16 @@
+set hive.stats.fetch.column.stats=true;
+
+create table t (col string);
+insert into t values ('x');
+
+explain
+select array("b", "d", "c", "a") FROM t;
+
+explain
+select array("b", "d", "c", col) FROM t;
+
+explain
+select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t;
+
+explain
+select sort_array(array("b", "d", "c", col)),array("1","2") FROM t;
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/array_size_estimation.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/array_size_estimation.q.out b/ql/src/test/results/clientpositive/array_size_estimation.q.out
new file mode 100644
index 0000000..3cd205f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/array_size_estimation.q.out
@@ -0,0 +1,155 @@
+PREHOOK: query: create table t (col string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (col string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values ('x')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('x')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.col SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: explain
+select array("b", "d", "c", "a") FROM t
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select array("b", "d", "c", "a") FROM t
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: array('b','d','c','a') (type: array<string>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select array("b", "d", "c", col) FROM t
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select array("b", "d", "c", col) FROM t
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: array('b','d','c',col) (type: array<string>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: sort_array(array('b','d','c','a')) (type: array<string>), array('1','2') (type: array<string>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select sort_array(array("b", "d", "c", col)),array("1","2") FROM t
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select sort_array(array("b", "d", "c", col)),array("1","2") FROM t
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sort_array(array('b','d','c',col)) (type: array<string>), array('1','2') (type: array<string>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out b/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out
index 0b73e84..b3ca623 100644
--- a/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out
+++ b/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out
@@ -85,17 +85,17 @@ STAGE PLANS:
Select Operator
expressions: array('a','b') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Select Operator
expressions: col (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -204,14 +204,14 @@ STAGE PLANS:
Select Operator
expressions: array('a','b') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Select Operator
expressions: col (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: select explode(array('a', 'b'))
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/lateral_view_onview.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/lateral_view_onview.q.out b/ql/src/test/results/clientpositive/lateral_view_onview.q.out
index 8bd36e2..f09b0bd 100644
--- a/ql/src/test/results/clientpositive/lateral_view_onview.q.out
+++ b/ql/src/test/results/clientpositive/lateral_view_onview.q.out
@@ -231,17 +231,17 @@ STAGE PLANS:
Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -249,23 +249,23 @@ STAGE PLANS:
Select Operator
expressions: array('a','b','c') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -288,17 +288,17 @@ STAGE PLANS:
Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -306,23 +306,23 @@ STAGE PLANS:
Select Operator
expressions: array('a','b','c') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/lateral_view_onview2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/lateral_view_onview2.q.out b/ql/src/test/results/clientpositive/lateral_view_onview2.q.out
index 16813e0..aec90de 100644
--- a/ql/src/test/results/clientpositive/lateral_view_onview2.q.out
+++ b/ql/src/test/results/clientpositive/lateral_view_onview2.q.out
@@ -64,17 +64,17 @@ STAGE PLANS:
Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -82,23 +82,23 @@ STAGE PLANS:
Select Operator
expressions: array('a','b','c') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -121,17 +121,17 @@ STAGE PLANS:
Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -139,23 +139,23 @@ STAGE PLANS:
Select Operator
expressions: array('a','b','c') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Lateral View Join Operator
outputColumnNames: _col4, _col5
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), _col5 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 9
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/select_dummy_source.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/select_dummy_source.q.out b/ql/src/test/results/clientpositive/select_dummy_source.q.out
index 0b73e84..b3ca623 100644
--- a/ql/src/test/results/clientpositive/select_dummy_source.q.out
+++ b/ql/src/test/results/clientpositive/select_dummy_source.q.out
@@ -85,17 +85,17 @@ STAGE PLANS:
Select Operator
expressions: array('a','b') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Select Operator
expressions: col (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -204,14 +204,14 @@ STAGE PLANS:
Select Operator
expressions: array('a','b') (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
UDTF Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
function name: explode
Select Operator
expressions: col (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: select explode(array('a', 'b'))
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/udf_array.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_array.q.out b/ql/src/test/results/clientpositive/udf_array.q.out
index 1abb399..16aedb2 100644
--- a/ql/src/test/results/clientpositive/udf_array.q.out
+++ b/ql/src/test/results/clientpositive/udf_array.q.out
@@ -31,7 +31,7 @@ STAGE PLANS:
Select Operator
expressions: array() (type: array<string>), array()[1] (type: string), array(1,2,3) (type: array<int>), array(1,2,3)[2] (type: int), array(1,'a',2,3) (type: array<string>), array(1,'a',2,3)[2] (type: string), array(array(1),array(2),array(3),array(4))[1][0] (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 500 Data size: 216000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 624000 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2],
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/udf_sort_array.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_sort_array.q.out b/ql/src/test/results/clientpositive/udf_sort_array.q.out
index 1e9dc85..f375e85 100644
--- a/ql/src/test/results/clientpositive/udf_sort_array.q.out
+++ b/ql/src/test/results/clientpositive/udf_sort_array.q.out
@@ -40,7 +40,7 @@ STAGE PLANS:
Select Operator
expressions: sort_array(array('b','d','c','a')) (type: array<string>)
outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 388000 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src tablesample (1 rows)
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/udf_split.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_split.q.out b/ql/src/test/results/clientpositive/udf_split.q.out
index d62cc61..c05a8dd 100644
--- a/ql/src/test/results/clientpositive/udf_split.q.out
+++ b/ql/src/test/results/clientpositive/udf_split.q.out
@@ -42,7 +42,7 @@ STAGE PLANS:
Select Operator
expressions: split('a b c', ' ') (type: array<string>), split('oneAtwoBthreeC', '[ABC]') (type: array<string>), split('', '.') (type: array<string>), split(50401020, 0) (type: array<string>)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 1276000 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java
index b891e27..cb1306e 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java
@@ -18,8 +18,6 @@
package org.apache.hadoop.hive.common.type;
import java.math.BigDecimal;
-import java.sql.Timestamp;
-import java.util.Date;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -27,7 +25,6 @@ import java.util.regex.Pattern;
import org.apache.commons.lang.builder.HashCodeBuilder;
import org.apache.hive.common.util.IntervalDayTimeUtils;
-import sun.util.calendar.BaseCalendar;
/**
* Day-time interval type representing an offset in days/hours/minutes/seconds,
@@ -170,6 +167,7 @@ public class HiveIntervalDayTime implements Comparable<HiveIntervalDayTime> {
/**
* Return a copy of this object.
*/
+ @Override
public Object clone() {
return new HiveIntervalDayTime(totalSeconds, nanos);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
index df952cb..68ea6db 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
@@ -282,7 +282,7 @@ public enum JavaDataModel {
return ((size + 8) >> 3) << 3;
}
- private long lengthForPrimitiveArrayOfSize(int primitiveSize, long length) {
+ public long lengthForPrimitiveArrayOfSize(int primitiveSize, long length) {
return alignUp(array() + primitiveSize*length, memoryAlign());
}
[2/4] hive git commit: HIVE-18187: Add jamon generated-sources as
source folder (Bertalan Kondrat via Zoltan Haindrich)
Posted by kg...@apache.org.
HIVE-18187: Add jamon generated-sources as source folder (Bertalan Kondrat via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6730411
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6730411
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6730411
Branch: refs/heads/master
Commit: c673041127468e7a0e3a3442b5ee27f624542c5d
Parents: 8ac91e7
Author: Bertalan Kondrat <kb...@gmail.com>
Authored: Tue Dec 5 10:43:47 2017 +0100
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Tue Dec 5 10:43:47 2017 +0100
----------------------------------------------------------------------
service/pom.xml | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c6730411/service/pom.xml
----------------------------------------------------------------------
diff --git a/service/pom.xml b/service/pom.xml
index 412bde5..6e5ce8e 100644
--- a/service/pom.xml
+++ b/service/pom.xml
@@ -319,6 +319,7 @@
<configuration>
<sources>
<source>${project.build.directory}/generated-sources/java</source>
+ <source>${project.build.directory}/generated-jamon</source>
</sources>
</configuration>
</execution>