You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/06/26 19:48:11 UTC

svn commit: r417256 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/InputFormat.java src/java/org/apache/hadoop/mapred/InputFormatBase.java src/java/org/apache/hadoop/mapred/JobClient.java

Author: cutting
Date: Mon Jun 26 10:48:11 2006
New Revision: 417256

URL: http://svn.apache.org/viewvc?rev=417256&view=rev
Log:
HADOOP-278.  Check for the existence of input directories before starting MapReduce jobs, making it easier to debug this common error.  Contributed by Owen.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon Jun 26 10:48:11 2006
@@ -47,6 +47,10 @@
 11. HADOOP-135.  Fix potential deadlock in JobTracker by acquiring
     locks in a consistent order.  (omalley via cutting)
 
+12. HADOOP-278.  Check for existence of input directories before
+    starting MapReduce jobs, making it easier to debug this common
+    error.  (omalley via cutting)
+
 
 Release 0.3.2 - 2006-06-09
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java Mon Jun 26 10:48:11 2006
@@ -19,6 +19,7 @@
 import java.io.IOException;
 
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 
 /** An input data format.  Input files are stored in a {@link FileSystem}.
  * The processing of an input file may be split across multiple machines.
@@ -26,6 +27,18 @@
  * RecordReader}.  Files must thus be split on record boundaries. */
 public interface InputFormat {
 
+  /**
+   * Are the input directories valid? This method is used to test the input
+   * directories when a job is submitted so that the framework can fail early
+   * with a useful error message when the input directory does not exist.
+   * @param fileSys the file system to check for the directories
+   * @param inputDirs the list of input directories
+   * @return is each inputDir valid?
+   * @throws IOException
+   */
+  boolean[] areValidInputDirectories(FileSystem fileSys,
+                                     Path[] inputDirs) throws IOException;
+  
   /** Splits a set of input files.  One split is created per map task.
    *
    * @param fs the filesystem containing the files to be split

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java Mon Jun 26 10:48:11 2006
@@ -98,6 +98,16 @@
     return (Path[])result.toArray(new Path[result.size()]);
   }
 
+  public boolean[] areValidInputDirectories(FileSystem fileSys,
+                                            Path[] inputDirs
+                                            ) throws IOException {
+    boolean[] result = new boolean[inputDirs.length];
+    for(int i=0; i < inputDirs.length; ++i) {
+      result[i] = fileSys.isDirectory(inputDirs[i]);
+    }
+    return result;
+  }
+
   /** Splits files returned by {#listPaths(FileSystem,JobConf) when
    * they're too big.*/ 
   public FileSplit[] getSplits(FileSystem fs, JobConf job, int numSplits)

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java?rev=417256&r1=417255&r2=417256&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java Mon Jun 26 10:48:11 2006
@@ -260,6 +260,17 @@
           job.setWorkingDirectory(fs.getWorkingDirectory());          
         }
 
+        Path[] inputDirs = job.getInputPaths();
+        boolean[] validDirs = 
+          job.getInputFormat().areValidInputDirectories(fs, inputDirs);
+        for(int i=0; i < validDirs.length; ++i) {
+          if (!validDirs[i]) {
+            String msg = "Input directory " + inputDirs[i] + " is invalid.";
+            LOG.error(msg);
+            throw new IOException(msg);
+          }
+        }
+
         // Check the output specification
         job.getOutputFormat().checkOutputSpecs(fs, job);