You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sa...@apache.org on 2013/08/23 23:23:10 UTC

svn commit: r1517046 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./ hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/

Author: sandy
Date: Fri Aug 23 21:23:10 2013
New Revision: 1517046

URL: http://svn.apache.org/r1517046
Log:
MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit subclass (Sandy Ryza)

Modified:
    hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java

Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1517046&r1=1517045&r2=1517046&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Fri Aug 23 21:23:10 2013
@@ -181,6 +181,9 @@ Release 2.1.1-beta - UNRELEASED
 
   IMPROVEMENTS
 
+    MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit
+    subclass (Sandy Ryza)
+
   OPTIMIZATIONS
 
     MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race

Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java?rev=1517046&r1=1517045&r2=1517046&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java Fri Aug 23 21:23:10 2013
@@ -60,48 +60,6 @@ public class TeraInputFormat extends Fil
   private static MRJobConfig lastContext = null;
   private static List<InputSplit> lastResult = null;
 
-  static class TeraFileSplit extends FileSplit {
-    static private String[] ZERO_LOCATIONS = new String[0];
-
-    private String[] locations;
-
-    public TeraFileSplit() {
-      locations = ZERO_LOCATIONS;
-    }
-    public TeraFileSplit(Path file, long start, long length, String[] hosts) {
-      super(file, start, length, hosts);
-      try {
-        locations = super.getLocations();
-      } catch (IOException e) {
-        locations = ZERO_LOCATIONS;
-      }
-    }
-
-    // XXXXXX should this also be null-protected?
-    protected void setLocations(String[] hosts) {
-      locations = hosts;
-    }
-
-    @Override
-    public String[] getLocations() {
-      return locations;
-    }
-
-    public String toString() {
-      StringBuffer result = new StringBuffer();
-      result.append(getPath());
-      result.append(" from ");
-      result.append(getStart());
-      result.append(" length ");
-      result.append(getLength());
-      for(String host: getLocations()) {
-        result.append(" ");
-        result.append(host);
-      }
-      return result.toString();
-    }
-  }
-
   static class TextSampler implements IndexedSortable {
     private ArrayList<Text> records = new ArrayList<Text>();
 
@@ -325,11 +283,6 @@ public class TeraInputFormat extends Fil
     return new TeraRecordReader();
   }
 
-  protected FileSplit makeSplit(Path file, long start, long length, 
-                                String[] hosts) {
-    return new TeraFileSplit(file, start, length, hosts);
-  }
-
   @Override
   public List<InputSplit> getSplits(JobContext job) throws IOException {
     if (job == lastContext) {
@@ -343,7 +296,7 @@ public class TeraInputFormat extends Fil
     System.out.println("Spent " + (t2 - t1) + "ms computing base-splits.");
     if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) {
       TeraScheduler scheduler = new TeraScheduler(
-        lastResult.toArray(new TeraFileSplit[0]), job.getConfiguration());
+        lastResult.toArray(new FileSplit[0]), job.getConfiguration());
       lastResult = scheduler.getNewFileSplits();
       t3 = System.currentTimeMillis(); 
       System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits.");

Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java?rev=1517046&r1=1517045&r2=1517046&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java Fri Aug 23 21:23:10 2013
@@ -24,7 +24,6 @@ import java.util.*;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.examples.terasort.TeraInputFormat.TeraFileSplit;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
@@ -214,8 +213,9 @@ class TeraScheduler {
     for(int i=0; i < splits.length; ++i) {
       if (splits[i].isAssigned) {
         // copy the split and fix up the locations
-        ((TeraFileSplit) realSplits[i]).setLocations
-           (new String[]{splits[i].locations.get(0).hostname});
+        String[] newLocations = {splits[i].locations.get(0).hostname};
+        realSplits[i] = new FileSplit(realSplits[i].getPath(),
+            realSplits[i].getStart(), realSplits[i].getLength(), newLocations);
         result[left++] = realSplits[i];
       } else {
         result[right--] = realSplits[i];