You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/06/26 19:48:11 UTC
svn commit: r417256 - in /lucene/hadoop/trunk: CHANGES.txt
src/java/org/apache/hadoop/mapred/InputFormat.java
src/java/org/apache/hadoop/mapred/InputFormatBase.java
src/java/org/apache/hadoop/mapred/JobClient.java
Author: cutting
Date: Mon Jun 26 10:48:11 2006
New Revision: 417256
URL: http://svn.apache.org/viewvc?rev=417256&view=rev
Log:
HADOOP-278. Check for the existence of input directories before starting MapReduce jobs, making it easier to debug this common error. Contributed by Owen.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon Jun 26 10:48:11 2006
@@ -47,6 +47,10 @@
11. HADOOP-135. Fix potential deadlock in JobTracker by acquiring
locks in a consistent order. (omalley via cutting)
+12. HADOOP-278. Check for existence of input directories before
+ starting MapReduce jobs, making it easier to debug this common
+ error. (omalley via cutting)
+
Release 0.3.2 - 2006-06-09
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java Mon Jun 26 10:48:11 2006
@@ -19,6 +19,7 @@
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
/** An input data format. Input files are stored in a {@link FileSystem}.
* The processing of an input file may be split across multiple machines.
@@ -26,6 +27,18 @@
* RecordReader}. Files must thus be split on record boundaries. */
public interface InputFormat {
+ /**
+ * Are the input directories valid? This method is used to test the input
+ * directories when a job is submitted so that the framework can fail early
+ * with a useful error message when the input directory does not exist.
+ * @param fileSys the file system to check for the directories
+ * @param inputDirs the list of input directories
+ * @return is each inputDir valid?
+ * @throws IOException
+ */
+ boolean[] areValidInputDirectories(FileSystem fileSys,
+ Path[] inputDirs) throws IOException;
+
/** Splits a set of input files. One split is created per map task.
*
* @param fs the filesystem containing the files to be split
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java Mon Jun 26 10:48:11 2006
@@ -98,6 +98,16 @@
return (Path[])result.toArray(new Path[result.size()]);
}
+ public boolean[] areValidInputDirectories(FileSystem fileSys,
+ Path[] inputDirs
+ ) throws IOException {
+ boolean[] result = new boolean[inputDirs.length];
+ for(int i=0; i < inputDirs.length; ++i) {
+ result[i] = fileSys.isDirectory(inputDirs[i]);
+ }
+ return result;
+ }
+
/** Splits files returned by {#listPaths(FileSystem,JobConf) when
* they're too big.*/
public FileSplit[] getSplits(FileSystem fs, JobConf job, int numSplits)
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java Mon Jun 26 10:48:11 2006
@@ -260,6 +260,17 @@
job.setWorkingDirectory(fs.getWorkingDirectory());
}
+ Path[] inputDirs = job.getInputPaths();
+ boolean[] validDirs =
+ job.getInputFormat().areValidInputDirectories(fs, inputDirs);
+ for(int i=0; i < validDirs.length; ++i) {
+ if (!validDirs[i]) {
+ String msg = "Input directory " + inputDirs[i] + " is invalid.";
+ LOG.error(msg);
+ throw new IOException(msg);
+ }
+ }
+
// Check the output specification
job.getOutputFormat().checkOutputSpecs(fs, job);