You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sa...@apache.org on 2013/08/23 23:23:10 UTC
svn commit: r1517046 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./
hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/
Author: sandy
Date: Fri Aug 23 21:23:10 2013
New Revision: 1517046
URL: http://svn.apache.org/r1517046
Log:
MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit subclass (Sandy Ryza)
Modified:
hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java
Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1517046&r1=1517045&r2=1517046&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Fri Aug 23 21:23:10 2013
@@ -181,6 +181,9 @@ Release 2.1.1-beta - UNRELEASED
IMPROVEMENTS
+ MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit
+ subclass (Sandy Ryza)
+
OPTIMIZATIONS
MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java?rev=1517046&r1=1517045&r2=1517046&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java Fri Aug 23 21:23:10 2013
@@ -60,48 +60,6 @@ public class TeraInputFormat extends Fil
private static MRJobConfig lastContext = null;
private static List<InputSplit> lastResult = null;
- static class TeraFileSplit extends FileSplit {
- static private String[] ZERO_LOCATIONS = new String[0];
-
- private String[] locations;
-
- public TeraFileSplit() {
- locations = ZERO_LOCATIONS;
- }
- public TeraFileSplit(Path file, long start, long length, String[] hosts) {
- super(file, start, length, hosts);
- try {
- locations = super.getLocations();
- } catch (IOException e) {
- locations = ZERO_LOCATIONS;
- }
- }
-
- // XXXXXX should this also be null-protected?
- protected void setLocations(String[] hosts) {
- locations = hosts;
- }
-
- @Override
- public String[] getLocations() {
- return locations;
- }
-
- public String toString() {
- StringBuffer result = new StringBuffer();
- result.append(getPath());
- result.append(" from ");
- result.append(getStart());
- result.append(" length ");
- result.append(getLength());
- for(String host: getLocations()) {
- result.append(" ");
- result.append(host);
- }
- return result.toString();
- }
- }
-
static class TextSampler implements IndexedSortable {
private ArrayList<Text> records = new ArrayList<Text>();
@@ -325,11 +283,6 @@ public class TeraInputFormat extends Fil
return new TeraRecordReader();
}
- protected FileSplit makeSplit(Path file, long start, long length,
- String[] hosts) {
- return new TeraFileSplit(file, start, length, hosts);
- }
-
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
if (job == lastContext) {
@@ -343,7 +296,7 @@ public class TeraInputFormat extends Fil
System.out.println("Spent " + (t2 - t1) + "ms computing base-splits.");
if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) {
TeraScheduler scheduler = new TeraScheduler(
- lastResult.toArray(new TeraFileSplit[0]), job.getConfiguration());
+ lastResult.toArray(new FileSplit[0]), job.getConfiguration());
lastResult = scheduler.getNewFileSplits();
t3 = System.currentTimeMillis();
System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits.");
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java?rev=1517046&r1=1517045&r2=1517046&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java Fri Aug 23 21:23:10 2013
@@ -24,7 +24,6 @@ import java.util.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.examples.terasort.TeraInputFormat.TeraFileSplit;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
@@ -214,8 +213,9 @@ class TeraScheduler {
for(int i=0; i < splits.length; ++i) {
if (splits[i].isAssigned) {
// copy the split and fix up the locations
- ((TeraFileSplit) realSplits[i]).setLocations
- (new String[]{splits[i].locations.get(0).hostname});
+ String[] newLocations = {splits[i].locations.get(0).hostname};
+ realSplits[i] = new FileSplit(realSplits[i].getPath(),
+ realSplits[i].getStart(), realSplits[i].getLength(), newLocations);
result[left++] = realSplits[i];
} else {
result[right--] = realSplits[i];