You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2011/05/04 21:07:17 UTC
svn commit: r1099560 -
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
Author: nzhang
Date: Wed May 4 19:07:17 2011
New Revision: 1099560
URL: http://svn.apache.org/viewvc?rev=1099560&view=rev
Log:
HIVE-2146. Block Sampling should adjust number of reducers accordingly to make it useful (Siying Dong via Ning Zhang)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java?rev=1099560&r1=1099559&r2=1099560&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java Wed May 4 19:07:17 2011
@@ -368,8 +368,34 @@ public class MapRedTask extends ExecDriv
long totalInputFileSize = inputSummary.getLength();
- LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers="
+ // if all inputs are sampled, we should shrink the size of reducers accordingly.
+ double highestSamplePercentage = 0;
+ boolean allSample = false;
+ for (String alias : work.getAliasToWork().keySet()) {
+ if (work.getNameToSplitSample().containsKey(alias)) {
+ allSample = true;
+ double rate = work.getNameToSplitSample().get(alias).getPercent();
+ if (rate > highestSamplePercentage) {
+ highestSamplePercentage = rate;
+ }
+ } else {
+ allSample = false;
+ break;
+ }
+ }
+ if (allSample) {
+ // This is a little bit dangerous if inputs turns out not to be able to be sampled.
+ // In that case, we significantly underestimate number of reducers.
+ // It's the same as other cases of estimateNumberOfReducers(). It's just our best
+ // guess and there is no guarantee.
+ totalInputFileSize = Math.min((long) (totalInputFileSize * highestSamplePercentage / 100D)
+ , totalInputFileSize);
+ LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers="
+ + maxReducers + " estimated totalInputFileSize=" + totalInputFileSize);
+ } else {
+ LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers="
+ maxReducers + " totalInputFileSize=" + totalInputFileSize);
+ }
int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
reducers = Math.max(1, reducers);