You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/09/07 22:19:37 UTC

svn commit: r1623195 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java test/results/clientpositive/union20.q.out

Author: prasanthj
Date: Sun Sep  7 20:19:36 2014
New Revision: 1623195

URL: http://svn.apache.org/r1623195
Log:
HIVE-7991: Incorrect calculation of number of rows in JoinStatsRule.process results in overflow (Prasanth J reviewed by Gunther Hagleitner)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
    hive/trunk/ql/src/test/results/clientpositive/union20.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1623195&r1=1623194&r2=1623195&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Sun Sep  7 20:19:36 2014
@@ -840,6 +840,7 @@ public class StatsRulesProcFactory {
 
           Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
           Map<Integer, List<String>> joinKeys = Maps.newHashMap();
+          List<Long> rowCounts = Lists.newArrayList();
 
           // get the join keys from parent ReduceSink operators
           for (int pos = 0; pos < parents.size(); pos++) {
@@ -859,6 +860,7 @@ public class StatsRulesProcFactory {
             for (String tabAlias : tableAliases) {
               rowCountParents.put(tabAlias, parentStats.getNumRows());
             }
+            rowCounts.add(parentStats.getNumRows());
 
             // multi-attribute join key
             if (keyExprs.size() > 1) {
@@ -959,8 +961,7 @@ public class StatsRulesProcFactory {
 
           // update join statistics
           stats.setColumnStats(outColStats);
-          long newRowCount = computeNewRowCount(
-              Lists.newArrayList(rowCountParents.values()), denom);
+          long newRowCount = computeNewRowCount(rowCounts, denom);
 
           updateStatsForJoinType(stats, newRowCount, jop, rowCountParents,
               outInTabAlias);

Modified: hive/trunk/ql/src/test/results/clientpositive/union20.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union20.q.out?rev=1623195&r1=1623194&r2=1623195&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union20.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union20.q.out Sun Sep  7 20:19:36 2014
@@ -132,14 +132,14 @@ STAGE PLANS:
             0 {KEY.reducesinkkey0} {VALUE._col0}
             1 {KEY.reducesinkkey0} {VALUE._col0}
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL
+            Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL
             File Output Operator
               compressed: false
-              Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL
+              Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat