You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/06/02 21:52:39 UTC

svn commit: r411277 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/mapred/ src/java/org/apache/hadoop/util/

Author: cutting
Date: Fri Jun  2 12:52:38 2006
New Revision: 411277

URL: http://svn.apache.org/viewvc?rev=411277&view=rev
Log:
HADOOP-265.  Fix tasktracker to not start when it doesn't have a writable local directory.  Contributed by Hairong.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Jun  2 12:52:38 2006
@@ -95,6 +95,10 @@
 25. HADOOP-211.  Switch to use the Jakarta Commons logging internally,
     configured to use log4j by default.  (Arun C Murthy and cutting)
 
+26. HADOOP-265.  Tasktracker now fails to start if it does not have a
+    writable local directory for temporary files.  In this case, it
+    logs a message to the JobTracker and exits. (Hairong Kuang via cutting)  
+
 
 Release 0.2.1 - 2006-05-12
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java Fri Jun  2 12:52:38 2006
@@ -63,6 +63,18 @@
    * files referred to by the JobTracker
    */
   public String getFilesystemName() throws IOException;
+  
+  /**
+   * Report a problem to the job tracker.
+   * @param taskTracker the name of the task tracker
+   * @param errorClass the kind of error (eg. the class that was thrown)
+   * @param errorMessage the human readable error message
+   * @throws IOException if there was a problem in communication or on the
+   *                     remote side
+   */
+  public void reportTaskTrackerError(String taskTracker,
+                                     String errorClass,
+                                     String errorMessage) throws IOException;
 }
 
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Fri Jun  2 12:52:38 2006
@@ -835,6 +835,13 @@
         return fs.getName();
     }
 
+
+    public void reportTaskTrackerError(String taskTracker,
+            String errorClass,
+            String errorMessage) throws IOException {
+        LOG.warn("Report from " + taskTracker + ": " + errorMessage);        
+    }
+
     ////////////////////////////////////////////////////
     // JobSubmissionProtocol
     ////////////////////////////////////////////////////

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Fri Jun  2 12:52:38 2006
@@ -20,6 +20,7 @@
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.util.*;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 
 import java.io.*;
 import java.net.*;
@@ -102,32 +103,6 @@
     }
     
     /**
-     * Start with the local machine name, and the default JobTracker
-     */
-    public TaskTracker(JobConf conf) throws IOException {
-      maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
-      this.fConf = conf;
-      this.jobTrackAddr = JobTracker.getAddress(conf);
-      this.taskTimeout = conf.getInt("mapred.task.timeout", 10* 60 * 1000);
-      this.mapOutputFile = new MapOutputFile();
-      this.mapOutputFile.setConf(conf);
-      int httpPort = conf.getInt("tasktracker.http.port", 50060);
-      StatusHttpServer server = new StatusHttpServer("task", httpPort, true);
-      int workerThreads = conf.getInt("tasktracker.http.threads", 40);
-      server.setThreads(1, workerThreads);
-      server.start();
-      this.httpPort = server.getPort();
-      // let the jsp pages get to the task tracker, config, and other relevant
-      // objects
-      FileSystem local = FileSystem.getNamed("local", conf);
-      server.setAttribute("task.tracker", this);
-      server.setAttribute("local.file.system", local);
-      server.setAttribute("conf", conf);
-      server.setAttribute("log", LOG);
-      initialize();
-    }
-
-    /**
      * Do the real constructor work here.  It's in a separate method
      * so we can call it again and "recycle" the object after calling
      * close().
@@ -135,6 +110,8 @@
     synchronized void initialize() throws IOException {
         this.localHostname = InetAddress.getLocalHost().getHostName();
 
+        //check local disk
+        checkLocalDirs(this.fConf.getLocalDirs());
         fConf.deleteLocalFiles(SUBDIR);
 
         // Clear out state tables
@@ -219,6 +196,32 @@
     }
 
     /**
+     * Start with the local machine name, and the default JobTracker
+     */
+    public TaskTracker(JobConf conf) throws IOException {
+      maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
+      this.fConf = conf;
+      this.jobTrackAddr = JobTracker.getAddress(conf);
+      this.taskTimeout = conf.getInt("mapred.task.timeout", 10* 60 * 1000);
+      this.mapOutputFile = new MapOutputFile();
+      this.mapOutputFile.setConf(conf);
+      int httpPort = conf.getInt("tasktracker.http.port", 50060);
+      StatusHttpServer server = new StatusHttpServer("task", httpPort, true);
+      int workerThreads = conf.getInt("tasktracker.http.threads", 40);
+      server.setThreads(1, workerThreads);
+      server.start();
+      this.httpPort = server.getPort();
+      // let the jsp pages get to the task tracker, config, and other relevant
+      // objects
+      FileSystem local = FileSystem.getNamed("local", conf);
+      server.setAttribute("task.tracker", this);
+      server.setAttribute("local.file.system", local);
+      server.setAttribute("conf", conf);
+      server.setAttribute("log", LOG);
+      initialize();
+    }
+
+    /**
      * The connection to the JobTracker, used by the TaskRunner 
      * for locating remote files.
      */
@@ -287,11 +290,17 @@
             //
             try {
               if (mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) {
+                  checkLocalDirs(fConf.getLocalDirs());
                   Task t = jobClient.pollForNewTask(taskTrackerName);
                   if (t != null) {
                     startNewTask(t);
                   }
               }
+            } catch (DiskErrorException de ) {
+                LOG.warn("Exiting task tracker because "+de.getMessage());
+                jobClient.reportTaskTrackerError(taskTrackerName, 
+                        "DiskErrorException", de.getMessage());
+                return STALE_STATE;
             } catch (IOException ie) {
               LOG.info("Problem launching task: " + 
                        StringUtils.stringifyException(ie));
@@ -914,6 +923,33 @@
     }
     
     /**
+     * Check if the given local directories
+     * (and parent directories, if necessary) can be created.
+     * @param localDirs where the new TaskTracker should keep its local files.
+     * @throws DiskErrorException if all local directories are not writable
+     * @author hairong
+     */
+    private static void checkLocalDirs( String[] localDirs ) 
+            throws DiskErrorException {
+        boolean writable = false;
+        
+        if( localDirs != null ) {
+            for (int i = 0; i < localDirs.length; i++) {
+                try {
+                    DiskChecker.checkDir( new File(localDirs[i]) );
+                    writable = true;
+                } catch( DiskErrorException e ) {
+                    LOG.warn("Task Tracker local " + e.getMessage() );
+                }
+            }
+        }
+
+        if( !writable )
+            throw new DiskErrorException( 
+                    "all local directories are not writable" );
+    }
+    
+    /**
      * Start the TaskTracker, point toward the indicated JobTracker
      */
     public static void main(String argv[]) throws Exception {
@@ -922,7 +958,12 @@
             System.exit(-1);
         }
 
-        JobConf conf=new JobConf();
-        new TaskTracker(conf).run();
+        try {
+          JobConf conf=new JobConf();
+          new TaskTracker(conf).run();
+        } catch (IOException e) {
+            LOG.warn( "Can not start task tracker because "+e.getMessage());
+            System.exit(-1);
+        }
     }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java Fri Jun  2 12:52:38 2006
@@ -11,7 +11,7 @@
 public class DiskChecker {
 
     public static class DiskErrorException extends IOException {
-      DiskErrorException(String msg) {
+      public DiskErrorException(String msg) {
         super(msg);
       }
     }