You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/07/19 20:22:00 UTC
svn commit: r219745 -
/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
Author: cutting
Date: Tue Jul 19 11:21:59 2005
New Revision: 219745
URL: http://svn.apache.org/viewcvs?rev=219745&view=rev
Log:
Sort splits to minimize tail when mapping.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java?rev=219745&r1=219744&r2=219745&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java Tue Jul 19 11:21:59 2005
@@ -588,6 +588,15 @@
FileSplit[] splits =
jd.getInputFormat().getSplits(fs, jd, numMapTasks);
+ // sort splits by decreasing length, to reduce job's tail
+ Arrays.sort(splits, new Comparator() {
+ public int compare(Object a, Object b) {
+ long diff =
+ ((FileSplit)b).getLength() - ((FileSplit)a).getLength();
+ return diff==0 ? 0 : (diff > 0 ? 1 : -1);
+ }
+ });
+
// adjust number of map tasks to actual number of splits
numMapTasks = splits.length;