You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/06/02 21:52:39 UTC
svn commit: r411277 - in /lucene/hadoop/trunk: ./
src/java/org/apache/hadoop/mapred/ src/java/org/apache/hadoop/util/
Author: cutting
Date: Fri Jun 2 12:52:38 2006
New Revision: 411277
URL: http://svn.apache.org/viewvc?rev=411277&view=rev
Log:
HADOOP-265. Fix tasktracker to not start when it doesn't have a writable local directory. Contributed by Hairong.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Jun 2 12:52:38 2006
@@ -95,6 +95,10 @@
25. HADOOP-211. Switch to use the Jakarta Commons logging internally,
configured to use log4j by default. (Arun C Murthy and cutting)
+26. HADOOP-265. Tasktracker now fails to start if it does not have a
+ writable local directory for temporary files. In this case, it
+ logs a message to the JobTracker and exits. (Hairong Kuang via cutting)
+
Release 0.2.1 - 2006-05-12
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java Fri Jun 2 12:52:38 2006
@@ -63,6 +63,18 @@
* files referred to by the JobTracker
*/
public String getFilesystemName() throws IOException;
+
+ /**
+ * Report a problem to the job tracker.
+ * @param taskTracker the name of the task tracker
+ * @param errorClass the kind of error (eg. the class that was thrown)
+ * @param errorMessage the human readable error message
+ * @throws IOException if there was a problem in communication or on the
+ * remote side
+ */
+ public void reportTaskTrackerError(String taskTracker,
+ String errorClass,
+ String errorMessage) throws IOException;
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Fri Jun 2 12:52:38 2006
@@ -835,6 +835,13 @@
return fs.getName();
}
+
+ public void reportTaskTrackerError(String taskTracker,
+ String errorClass,
+ String errorMessage) throws IOException {
+ LOG.warn("Report from " + taskTracker + ": " + errorMessage);
+ }
+
////////////////////////////////////////////////////
// JobSubmissionProtocol
////////////////////////////////////////////////////
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Fri Jun 2 12:52:38 2006
@@ -20,6 +20,7 @@
import org.apache.hadoop.fs.*;
import org.apache.hadoop.ipc.*;
import org.apache.hadoop.util.*;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import java.io.*;
import java.net.*;
@@ -102,32 +103,6 @@
}
/**
- * Start with the local machine name, and the default JobTracker
- */
- public TaskTracker(JobConf conf) throws IOException {
- maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
- this.fConf = conf;
- this.jobTrackAddr = JobTracker.getAddress(conf);
- this.taskTimeout = conf.getInt("mapred.task.timeout", 10* 60 * 1000);
- this.mapOutputFile = new MapOutputFile();
- this.mapOutputFile.setConf(conf);
- int httpPort = conf.getInt("tasktracker.http.port", 50060);
- StatusHttpServer server = new StatusHttpServer("task", httpPort, true);
- int workerThreads = conf.getInt("tasktracker.http.threads", 40);
- server.setThreads(1, workerThreads);
- server.start();
- this.httpPort = server.getPort();
- // let the jsp pages get to the task tracker, config, and other relevant
- // objects
- FileSystem local = FileSystem.getNamed("local", conf);
- server.setAttribute("task.tracker", this);
- server.setAttribute("local.file.system", local);
- server.setAttribute("conf", conf);
- server.setAttribute("log", LOG);
- initialize();
- }
-
- /**
* Do the real constructor work here. It's in a separate method
* so we can call it again and "recycle" the object after calling
* close().
@@ -135,6 +110,8 @@
synchronized void initialize() throws IOException {
this.localHostname = InetAddress.getLocalHost().getHostName();
+ //check local disk
+ checkLocalDirs(this.fConf.getLocalDirs());
fConf.deleteLocalFiles(SUBDIR);
// Clear out state tables
@@ -219,6 +196,32 @@
}
/**
+ * Start with the local machine name, and the default JobTracker
+ */
+ public TaskTracker(JobConf conf) throws IOException {
+ maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
+ this.fConf = conf;
+ this.jobTrackAddr = JobTracker.getAddress(conf);
+ this.taskTimeout = conf.getInt("mapred.task.timeout", 10* 60 * 1000);
+ this.mapOutputFile = new MapOutputFile();
+ this.mapOutputFile.setConf(conf);
+ int httpPort = conf.getInt("tasktracker.http.port", 50060);
+ StatusHttpServer server = new StatusHttpServer("task", httpPort, true);
+ int workerThreads = conf.getInt("tasktracker.http.threads", 40);
+ server.setThreads(1, workerThreads);
+ server.start();
+ this.httpPort = server.getPort();
+ // let the jsp pages get to the task tracker, config, and other relevant
+ // objects
+ FileSystem local = FileSystem.getNamed("local", conf);
+ server.setAttribute("task.tracker", this);
+ server.setAttribute("local.file.system", local);
+ server.setAttribute("conf", conf);
+ server.setAttribute("log", LOG);
+ initialize();
+ }
+
+ /**
* The connection to the JobTracker, used by the TaskRunner
* for locating remote files.
*/
@@ -287,11 +290,17 @@
//
try {
if (mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) {
+ checkLocalDirs(fConf.getLocalDirs());
Task t = jobClient.pollForNewTask(taskTrackerName);
if (t != null) {
startNewTask(t);
}
}
+ } catch (DiskErrorException de ) {
+ LOG.warn("Exiting task tracker because "+de.getMessage());
+ jobClient.reportTaskTrackerError(taskTrackerName,
+ "DiskErrorException", de.getMessage());
+ return STALE_STATE;
} catch (IOException ie) {
LOG.info("Problem launching task: " +
StringUtils.stringifyException(ie));
@@ -914,6 +923,33 @@
}
/**
+ * Check if the given local directories
+ * (and parent directories, if necessary) can be created.
+ * @param localDirs where the new TaskTracker should keep its local files.
+ * @throws DiskErrorException if all local directories are not writable
+ * @author hairong
+ */
+ private static void checkLocalDirs( String[] localDirs )
+ throws DiskErrorException {
+ boolean writable = false;
+
+ if( localDirs != null ) {
+ for (int i = 0; i < localDirs.length; i++) {
+ try {
+ DiskChecker.checkDir( new File(localDirs[i]) );
+ writable = true;
+ } catch( DiskErrorException e ) {
+ LOG.warn("Task Tracker local " + e.getMessage() );
+ }
+ }
+ }
+
+ if( !writable )
+ throw new DiskErrorException(
+ "all local directories are not writable" );
+ }
+
+ /**
* Start the TaskTracker, point toward the indicated JobTracker
*/
public static void main(String argv[]) throws Exception {
@@ -922,7 +958,12 @@
System.exit(-1);
}
- JobConf conf=new JobConf();
- new TaskTracker(conf).run();
+ try {
+ JobConf conf=new JobConf();
+ new TaskTracker(conf).run();
+ } catch (IOException e) {
+ LOG.warn( "Can not start task tracker because "+e.getMessage());
+ System.exit(-1);
+ }
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java?rev=411277&r1=411276&r2=411277&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/DiskChecker.java Fri Jun 2 12:52:38 2006
@@ -11,7 +11,7 @@
public class DiskChecker {
public static class DiskErrorException extends IOException {
- DiskErrorException(String msg) {
+ public DiskErrorException(String msg) {
super(msg);
}
}