You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dd...@apache.org on 2008/03/21 21:40:39 UTC
svn commit: r639818 [3/3] - in /hadoop/core/branches/branch-0.16: ./ docs/
src/docs/src/documentation/content/xdocs/
src/examples/org/apache/hadoop/examples/ src/java/org/apache/hadoop/mapred/
src/test/org/apache/hadoop/io/ src/test/org/apache/hadoop/m...
Modified: hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml (original)
+++ hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml Fri Mar 21 13:40:33 2008
@@ -1282,10 +1282,13 @@
<p>The application-writer can take advantage of this feature by
creating any side-files required in <code>${mapred.output.dir}</code>
during execution of a task via
- <a href="ext:api/org/apache/hadoop/mapred/jobconf/getoutputpath">
- JobConf.getOutputPath()</a>, and the framework will promote them
+ <a href="ext:api/org/apache/hadoop/mapred/jobconf/getcurrentoutputpath">
+ JobConf.getCurrentOutputPath()</a>, and the framework will promote them
similarly for succesful task-attempts, thus eliminating the need to
- pick unique paths per task-attempt.</p>
+ pick unique paths per task-attempt. She can get the actual configured
+ path (final output path) via
+ <a href="ext:api/org/apache/hadoop/mapred/jobconf/getfinaloutputpath">
+ JobConf.getFinalOutputPath()</a></p>
</section>
<section>
Modified: hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/site.xml?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/site.xml Fri Mar 21 13:40:33 2008
@@ -136,7 +136,8 @@
<setoutputvaluegroupingcomparator href="#setOutputValueGroupingComparator(java.lang.Class)" />
<setinputpath href="#setInputPath(org.apache.hadoop.fs.Path)" />
<addinputpath href="#addInputPath(org.apache.hadoop.fs.Path)" />
- <getoutputpath href="#getOutputPath()" />
+ <getcurrentoutputpath href="#getCurrentOutputPath()" />
+ <getfinaloutputpath href="#getFinalOutputPath()" />
<setoutputpath href="#setOutputPath(org.apache.hadoop.fs.Path)" />
<setcombinerclass href="#setCombinerClass(java.lang.Class)" />
<setmapdebugscript href="#setMapDebugScript(java.lang.String)" />
Modified: hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/RandomWriter.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/RandomWriter.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/RandomWriter.java (original)
+++ hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/RandomWriter.java Fri Mar 21 13:40:33 2008
@@ -105,7 +105,7 @@
public InputSplit[] getSplits(JobConf job,
int numSplits) throws IOException {
InputSplit[] result = new InputSplit[numSplits];
- Path outDir = job.getOutputPath();
+ Path outDir = job.getCurrentOutputPath();
for(int i=0; i < result.length; ++i) {
result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, job);
}
Modified: hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/Sort.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/Sort.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/Sort.java (original)
+++ hadoop/core/branches/branch-0.16/src/examples/org/apache/hadoop/examples/Sort.java Fri Mar 21 13:40:33 2008
@@ -140,7 +140,7 @@
cluster.getTaskTrackers() +
" nodes to sort from " +
jobConf.getInputPaths()[0] + " into " +
- jobConf.getOutputPath() + " with " + num_reduces + " reduces.");
+ jobConf.getCurrentOutputPath() + " with " + num_reduces + " reduces.");
Date startTime = new Date();
System.out.println("Job started: " + startTime);
JobClient.runJob(jobConf);
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobConf.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobConf.java Fri Mar 21 13:40:33 2008
@@ -353,7 +353,20 @@
}
/**
- * Get the {@link Path} to the output directory for the map-reduce job.
+ * @deprecated Please use {@link #getCurrentOutputPath()}
+ * or {@link #getFinalOutputPath()}
+ *
+ * @return the {@link Path} to the output directory for the map-reduce job.
+ */
+ @Deprecated
+ public Path getOutputPath() {
+ return getCurrentOutputPath();
+ }
+
+ /**
+ * Get the {@link Path} to the output directory for the map-reduce job
+ * (This is sensitive to the task execution. While executing task, this
+ * value points to the task's temporary output directory)
*
* <h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
*
@@ -378,29 +391,45 @@
*
* <p>The application-writer can take advantage of this by creating any
* side-files required in <tt>${mapred.output.dir}</tt> during execution of his
- * reduce-task i.e. via {@link #getOutputPath()}, and the framework will move
- * them out similarly - thus she doesn't have to pick unique paths per
- * task-attempt.</p>
+ * reduce-task i.e. via {@link #getCurrentOutputPath()},
+ * and the framework will move them out similarly
+ * - thus she doesn't have to pick unique paths per task-attempt.</p>
*
* <p><i>Note</i>: the value of <tt>${mapred.output.dir}</tt> during execution
* of a particular task-attempt is actually
* <tt>${mapred.output.dir}/_temporary/_{$taskid}</tt>, not the value set by
* {@link #setOutputPath(Path)}. So, just create any side-files in the path
- * returned by {@link #getOutputPath()} from map/reduce task to take
+ * returned by {@link #getCurrentOutputPath()} from map/reduce task to take
* advantage of this feature.</p>
*
* <p>The entire discussion holds true for maps of jobs with
* reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
* goes directly to HDFS.</p>
*
+ * @see #getFinalOutputPath()
+ *
* @return the {@link Path} to the output directory for the map-reduce job.
*/
- public Path getOutputPath() {
+ public Path getCurrentOutputPath() {
String name = get("mapred.output.dir");
return name == null ? null: new Path(name);
}
/**
+ * Get the {@link Path} to the output directory for the map-reduce job
+ *
+ * This is the actual configured output path set
+ * using {@link #setOutputPath(Path)} for job submission.
+ *
+ * @see #getCurrentOutputPath()
+ * @return the {@link Path} to the output directory for the map-reduce job.
+ */
+ public Path getFinalOutputPath() {
+ String name = get("mapred.final.output.dir");
+ return name == null ? null: new Path(name);
+ }
+
+ /**
* Set the {@link Path} of the output directory for the map-reduce job.
*
* <p><i>Note</i>:
@@ -410,6 +439,8 @@
public void setOutputPath(Path dir) {
dir = new Path(getWorkingDirectory(), dir);
set("mapred.output.dir", dir.toString());
+ if (get("mapred.final.output.dir") == null)
+ set("mapred.final.output.dir", dir.toString());
}
/**
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobInProgress.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobInProgress.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobInProgress.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/JobInProgress.java Fri Mar 21 13:40:33 2008
@@ -277,7 +277,7 @@
}
// create job specific temporary directory in output path
- Path outputPath = conf.getOutputPath();
+ Path outputPath = conf.getCurrentOutputPath();
if (outputPath != null) {
Path tmpDir = new Path(outputPath, MRConstants.TEMP_DIR_NAME);
FileSystem fileSys = tmpDir.getFileSystem(conf);
@@ -1141,7 +1141,7 @@
fs.delete(tempDir);
// delete the temporary directory in output directory
- Path outputPath = conf.getOutputPath();
+ Path outputPath = conf.getCurrentOutputPath();
if (outputPath != null) {
Path tmpDir = new Path(outputPath, MRConstants.TEMP_DIR_NAME);
FileSystem fileSys = tmpDir.getFileSystem(conf);
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/LocalJobRunner.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/LocalJobRunner.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/LocalJobRunner.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/LocalJobRunner.java Fri Mar 21 13:40:33 2008
@@ -114,7 +114,7 @@
job.setNumReduceTasks(1);
}
// create job specific temp directory in output path
- Path outputPath = job.getOutputPath();
+ Path outputPath = job.getCurrentOutputPath();
FileSystem outputFs = null;
Path tmpDir = null;
if (outputPath != null) {
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/MapFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/MapFileOutputFormat.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/MapFileOutputFormat.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/MapFileOutputFormat.java Fri Mar 21 13:40:33 2008
@@ -42,7 +42,7 @@
String name, Progressable progress)
throws IOException {
- Path outputPath = job.getOutputPath();
+ Path outputPath = job.getCurrentOutputPath();
FileSystem fs = outputPath.getFileSystem(job);
if (!fs.exists(outputPath)) {
throw new IOException("Output directory doesnt exist");
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/OutputFormatBase.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/OutputFormatBase.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/OutputFormatBase.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/OutputFormatBase.java Fri Mar 21 13:40:33 2008
@@ -100,7 +100,7 @@
throws FileAlreadyExistsException,
InvalidJobConfException, IOException {
// Ensure that the output directory is set and not already there
- Path outDir = job.getOutputPath();
+ Path outDir = job.getCurrentOutputPath();
if (outDir == null && job.getNumReduceTasks() != 0) {
throw new InvalidJobConfException("Output directory not set in JobConf.");
}
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/SequenceFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/SequenceFileOutputFormat.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/SequenceFileOutputFormat.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/SequenceFileOutputFormat.java Fri Mar 21 13:40:33 2008
@@ -40,7 +40,7 @@
String name, Progressable progress)
throws IOException {
- Path outputPath = job.getOutputPath();
+ Path outputPath = job.getCurrentOutputPath();
FileSystem fs = outputPath.getFileSystem(job);
if (!fs.exists(outputPath)) {
throw new IOException("Output directory doesnt exist");
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/Task.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/Task.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/Task.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/Task.java Fri Mar 21 13:40:33 2008
@@ -190,7 +190,7 @@
public String toString() { return taskId; }
private Path getTaskOutputPath(JobConf conf) {
- Path p = new Path(conf.getOutputPath(),
+ Path p = new Path(conf.getCurrentOutputPath(),
(MRConstants.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskId));
try {
FileSystem fs = p.getFileSystem(conf);
@@ -212,7 +212,7 @@
conf.set("mapred.job.id", jobId);
// The task-specific output path
- if (conf.getOutputPath() != null) {
+ if (conf.getCurrentOutputPath() != null) {
taskOutputPath = getTaskOutputPath(conf);
conf.setOutputPath(taskOutputPath);
}
@@ -397,7 +397,7 @@
this.conf = (JobConf) conf;
if (taskId != null && taskOutputPath == null &&
- this.conf.getOutputPath() != null) {
+ this.conf.getCurrentOutputPath() != null) {
taskOutputPath = getTaskOutputPath(this.conf);
}
} else {
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TaskTracker.java Fri Mar 21 13:40:33 2008
@@ -1420,7 +1420,7 @@
keepFailedTaskFiles = localJobConf.getKeepFailedTaskFiles();
// create _taskid directory in output path temporary directory.
- Path outputPath = localJobConf.getOutputPath();
+ Path outputPath = localJobConf.getCurrentOutputPath();
if (outputPath != null) {
Path jobTmpDir = new Path(outputPath, MRConstants.TEMP_DIR_NAME);
FileSystem fs = jobTmpDir.getFileSystem(localJobConf);
Modified: hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TextOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TextOutputFormat.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TextOutputFormat.java (original)
+++ hadoop/core/branches/branch-0.16/src/java/org/apache/hadoop/mapred/TextOutputFormat.java Fri Mar 21 13:40:33 2008
@@ -106,7 +106,7 @@
Progressable progress)
throws IOException {
- Path dir = job.getOutputPath();
+ Path dir = job.getCurrentOutputPath();
FileSystem fs = dir.getFileSystem(job);
if (!fs.exists(dir)) {
throw new IOException("Output directory doesnt exist");
Modified: hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/io/FileBench.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/io/FileBench.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/io/FileBench.java (original)
+++ hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/io/FileBench.java Fri Mar 21 13:40:33 2008
@@ -112,7 +112,7 @@
Text val = new Text();
final String fn = conf.get("test.filebench.name", "");
- final Path outd = conf.getOutputPath();
+ final Path outd = conf.getCurrentOutputPath();
OutputFormat outf = conf.getOutputFormat();
RecordWriter<Text,Text> rw =
outf.getRecordWriter(outd.getFileSystem(conf), conf, fn,
Modified: hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/GenericMRLoadGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/GenericMRLoadGenerator.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/GenericMRLoadGenerator.java (original)
+++ hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/GenericMRLoadGenerator.java Fri Mar 21 13:40:33 2008
@@ -140,7 +140,7 @@
return -1;
}
- if (null == job.getOutputPath()) {
+ if (null == job.getCurrentOutputPath()) {
// No output dir? No writes
job.setOutputFormat(NullOutputFormat.class);
}
Modified: hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/MRBench.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/MRBench.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/MRBench.java (original)
+++ hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/MRBench.java Fri Mar 21 13:40:33 2008
@@ -184,7 +184,7 @@
LOG.info("Running job " + i + ":" +
" input=" + jobConf.getInputPaths()[0] +
- " output=" + jobConf.getOutputPath());
+ " output=" + jobConf.getCurrentOutputPath());
// run the mapred task now
long curTime = System.currentTimeMillis();
Modified: hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/SortValidator.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/SortValidator.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/SortValidator.java (original)
+++ hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/SortValidator.java Fri Mar 21 13:40:33 2008
@@ -351,7 +351,7 @@
"from " + jobConf.getInputPaths()[0] + " (" +
noSortInputpaths + " files), " +
jobConf.getInputPaths()[1] + " (" + noSortReduceTasks +
- " files) into " + jobConf.getOutputPath() +
+ " files) into " + jobConf.getCurrentOutputPath() +
" with 1 reducer.");
Date startTime = new Date();
System.out.println("Job started: " + startTime);
@@ -492,7 +492,8 @@
System.out.println("\nSortValidator.RecordChecker: Running on " +
cluster.getTaskTrackers() +
" nodes to validate sort from " + jobConf.getInputPaths()[0] + ", " +
- jobConf.getInputPaths()[1] + " into " + jobConf.getOutputPath() +
+ jobConf.getInputPaths()[1] + " into " +
+ jobConf.getCurrentOutputPath() +
" with " + noReduces + " reduces.");
Date startTime = new Date();
System.out.println("Job started: " + startTime);
Modified: hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/ThreadedMapBenchmark.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/ThreadedMapBenchmark.java?rev=639818&r1=639817&r2=639818&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/ThreadedMapBenchmark.java (original)
+++ hadoop/core/branches/branch-0.16/src/test/org/apache/hadoop/mapred/ThreadedMapBenchmark.java Fri Mar 21 13:40:33 2008
@@ -78,7 +78,7 @@
public InputSplit[] getSplits(JobConf job,
int numSplits) throws IOException {
InputSplit[] result = new InputSplit[numSplits];
- Path outDir = job.getOutputPath();
+ Path outDir = job.getCurrentOutputPath();
for(int i=0; i < result.length; ++i) {
result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1,
job);