You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/07/19 20:22:00 UTC

svn commit: r219745 - /lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java

Author: cutting
Date: Tue Jul 19 11:21:59 2005
New Revision: 219745

URL: http://svn.apache.org/viewcvs?rev=219745&view=rev
Log:
Sort splits to minimize tail when mapping.

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java?rev=219745&r1=219744&r2=219745&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java Tue Jul 19 11:21:59 2005
@@ -588,6 +588,15 @@
             FileSplit[] splits =
               jd.getInputFormat().getSplits(fs, jd, numMapTasks);
 
+            // sort splits by decreasing length, to reduce job's tail
+            Arrays.sort(splits, new Comparator() {
+                public int compare(Object a, Object b) {
+                  long diff =
+                    ((FileSplit)b).getLength() - ((FileSplit)a).getLength();
+                  return diff==0 ? 0 : (diff > 0 ? 1 : -1);
+                }
+              });
+
             // adjust number of map tasks to actual number of splits
             numMapTasks = splits.length;