You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/09/07 22:19:37 UTC
svn commit: r1623195 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
test/results/clientpositive/union20.q.out
Author: prasanthj
Date: Sun Sep 7 20:19:36 2014
New Revision: 1623195
URL: http://svn.apache.org/r1623195
Log:
HIVE-7991: Incorrect calculation of number of rows in JoinStatsRule.process results in overflow (Prasanth J reviewed by Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
hive/trunk/ql/src/test/results/clientpositive/union20.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1623195&r1=1623194&r2=1623195&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Sun Sep 7 20:19:36 2014
@@ -840,6 +840,7 @@ public class StatsRulesProcFactory {
Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
Map<Integer, List<String>> joinKeys = Maps.newHashMap();
+ List<Long> rowCounts = Lists.newArrayList();
// get the join keys from parent ReduceSink operators
for (int pos = 0; pos < parents.size(); pos++) {
@@ -859,6 +860,7 @@ public class StatsRulesProcFactory {
for (String tabAlias : tableAliases) {
rowCountParents.put(tabAlias, parentStats.getNumRows());
}
+ rowCounts.add(parentStats.getNumRows());
// multi-attribute join key
if (keyExprs.size() > 1) {
@@ -959,8 +961,7 @@ public class StatsRulesProcFactory {
// update join statistics
stats.setColumnStats(outColStats);
- long newRowCount = computeNewRowCount(
- Lists.newArrayList(rowCountParents.values()), denom);
+ long newRowCount = computeNewRowCount(rowCounts, denom);
updateStatsForJoinType(stats, newRowCount, jop, rowCountParents,
outInTabAlias);
Modified: hive/trunk/ql/src/test/results/clientpositive/union20.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union20.q.out?rev=1623195&r1=1623194&r2=1623195&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union20.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union20.q.out Sun Sep 7 20:19:36 2014
@@ -132,14 +132,14 @@ STAGE PLANS:
0 {KEY.reducesinkkey0} {VALUE._col0}
1 {KEY.reducesinkkey0} {VALUE._col0}
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat