You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/01/09 22:36:24 UTC

svn commit: r494602 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/MapTask.java

Author: cutting
Date: Tue Jan  9 13:36:24 2007
New Revision: 494602

URL: http://svn.apache.org/viewvc?view=rev&rev=494602
Log:
HADOOP-868.  Decrease the number of files opened during map.  Contributed by Devaraj.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=494602&r1=494601&r2=494602
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jan  9 13:36:24 2007
@@ -23,6 +23,9 @@
  7. HADOOP-871.  Fix a bug in bin/hadoop setting JAVA_LIBRARY_PATH.
     (Arun C Murthy via cutting)
 
+ 8. HADOOP-868.  Decrease the number of open files during map,
+    respecting io.sort.factor.  (Devaraj Das via cutting)
+
 
 Release 0.10.0 - 2007-01-05
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java?view=diff&rev=494602&r1=494601&r2=494602
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java Tue Jan  9 13:36:24 2007
@@ -467,26 +467,24 @@
       {
         Path [] filename = new Path[numSpills];
         Path [] indexFileName = new Path[numSpills];
-        FSDataInputStream in[] = new FSDataInputStream[numSpills];
-        FSDataInputStream indexIn[] = new FSDataInputStream[numSpills];
         
         for(int i = 0; i < numSpills; i++) {
           filename[i] = mapOutputFile.getSpillFile(getTaskId(), i);
-          in[i] = localFs.open(filename[i]);
           indexFileName[i] = mapOutputFile.getSpillIndexFile(getTaskId(), i);
-          indexIn[i] = localFs.open(indexFileName[i]);
         }
         
         //create a sorter object as we need access to the SegmentDescriptor
         //class and merge methods
         Sorter sorter = new Sorter(localFs, keyClass, valClass, job);
-        sorter.setFactor(numSpills);
         
         for (int parts = 0; parts < partitions; parts++){
           List<SegmentDescriptor> segmentList = new ArrayList(numSpills);
           for(int i = 0; i < numSpills; i++) {
-            long segmentOffset = indexIn[i].readLong();
-            long segmentLength = indexIn[i].readLong();
+            FSDataInputStream indexIn = localFs.open(indexFileName[i]);
+            indexIn.seek(parts * 16);
+            long segmentOffset = indexIn.readLong();
+            long segmentLength = indexIn.readLong();
+            indexIn.close();
             SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset,
                 segmentLength, filename[i]);
             s.preserveInput(true);
@@ -513,8 +511,8 @@
         finalIndexOut.close();
         //cleanup
         for(int i = 0; i < numSpills; i++) {
-          in[i].close(); localFs.delete(filename[i]);
-          indexIn[i].close(); localFs.delete(indexFileName[i]);
+          localFs.delete(filename[i]);
+          localFs.delete(indexFileName[i]);
         }
       }
     }