You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/06/20 20:52:08 UTC

svn commit: r415765 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/mapred/ src/webapps/job/

Author: cutting
Date: Tue Jun 20 11:52:07 2006
New Revision: 415765

URL: http://svn.apache.org/viewvc?rev=415765&view=rev
Log:
HADOOP-271.  Add links from jobtracker's web ui to tasktracker's web ui.  Also attempt to log a thread dump of child processes before they're killed.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
    lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp
    lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jun 20 11:52:07 2006
@@ -16,6 +16,10 @@
     tasktracker, greatly increasing code coverage.
     (Milind Bhandarkar via cutting)
 
+ 5. HADOOP-271.  Add links from jobtracker's web ui to tasktracker's
+    web ui.  Also attempt to log a thread dump of child processes
+    before they're killed.  (omalley via cutting)
+
 
 Release 0.3.2 - 2006-06-09
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java Tue Jun 20 11:52:07 2006
@@ -236,6 +236,14 @@
     }
     
     /**
+     * Get the job configuration
+     * @return the job's configuration
+     */
+    JobConf getJobConf() {
+      return conf;
+    }
+    
+    /**
      * Return a treeset of completed TaskInProgress objects
      */
     public Vector reportTasksInProgress(boolean shouldBeMap, boolean shouldBeComplete) {
@@ -604,7 +612,7 @@
                                           TaskStatus.FAILED,
                                           reason,
                                           reason,
-                                          hostname);
+                                          trackerName);
        failedTask(tip, taskid, status, trackerName);
     }
        

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Tue Jun 20 11:52:07 2006
@@ -128,8 +128,11 @@
                   }
                   if (tip != null) {
                      JobInProgress job = tip.getJob();
+                     String trackerName = getAssignedTracker(taskId);
+                     TaskTrackerStatus trackerStatus = 
+                       getTaskTracker(trackerName);
                      job.failedTask(tip, taskId, "Error launching task", 
-                                    "n/a", "n/a");
+                                    trackerStatus.getHost(), trackerName);
                   }
                   itr.remove();
                 } else {
@@ -1029,6 +1032,15 @@
 	return tip.getTaskStatuses();
     }
 
+    /**
+     * Get tracker name for a given task id.
+     * @param taskId the name of the task
+     * @return The name of the task tracker
+     */
+    public synchronized String getAssignedTracker(String taskId) {
+      return (String) taskidToTrackerMap.get(taskId);
+    }
+    
     ///////////////////////////////////////////////////////////////
     // JobTracker methods
     ///////////////////////////////////////////////////////////////
@@ -1055,7 +1067,7 @@
     void updateTaskStatuses(TaskTrackerStatus status) {
         for (Iterator it = status.taskReports(); it.hasNext(); ) {
             TaskStatus report = (TaskStatus) it.next();
-            report.setHostname(status.getHost());
+            report.setTaskTracker(status.getTrackerName());
             String taskId = report.getTaskId();
             TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);
             if (tip == null) {

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java Tue Jun 20 11:52:07 2006
@@ -37,20 +37,20 @@
     private int runState;
     private String diagnosticInfo;
     private String stateString;
-    private String hostname;
+    private String taskTracker;
 
     public TaskStatus() {}
 
     public TaskStatus(String taskid, boolean isMap, float progress,
                       int runState, String diagnosticInfo,
-                      String stateString, String hostname) {
+                      String stateString, String taskTracker) {
         this.taskid = taskid;
         this.isMap = isMap;
         this.progress = progress;
         this.runState = runState;
         this.diagnosticInfo = diagnosticInfo;
         this.stateString = stateString;
-	this.hostname = hostname;
+        this.taskTracker = taskTracker;
     }
     
     public String getTaskId() { return taskid; }
@@ -58,8 +58,8 @@
     public float getProgress() { return progress; }
     public void setProgress(float progress) { this.progress = progress; } 
     public int getRunState() { return runState; }
-    public String getHostname() {return hostname;}
-    public void setHostname(String host) { this.hostname = host;}
+    public String getTaskTracker() {return taskTracker;}
+    public void setTaskTracker(String tracker) { this.taskTracker = tracker;}
     public void setRunState(int runState) { this.runState = runState; }
     public String getDiagnosticInfo() { return diagnosticInfo; }
     public void setDiagnosticInfo(String info) { this.diagnosticInfo = info; }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Tue Jun 20 11:52:07 2006
@@ -250,6 +250,28 @@
     }
 
     /**
+     * Are we running under killall-less operating system.
+     */
+    private static boolean isWindows = 
+      System.getProperty("os.name").startsWith("Windows");
+    
+    /**
+     * Get the call stacks for all java processes on this system.
+     * Obviously, this is only useful for debugging.
+     */
+    private static void getCallStacks() {
+      if (LOG.isDebugEnabled() && !isWindows) {
+         try {
+          Process proc = 
+            Runtime.getRuntime().exec("killall -QUIT java");
+          proc.waitFor();
+        } catch (IOException ie) {
+          LOG.warn(StringUtils.stringifyException(ie));
+        } catch (InterruptedException ie) {}
+      }
+    }
+    
+    /**
      * Main service loop.  Will stay in this loop forever.
      */
     int offerService() throws Exception {
@@ -341,6 +363,7 @@
                                      (timeSinceLastReport / 1000) + 
                                      " seconds. Killing.";
                         LOG.info(tip.getTask().getTaskId() + ": " + msg);
+                        getCallStacks();
                         tip.reportDiagnosticInfo(msg);
                         try {
                           tip.killAndCleanup(true);
@@ -582,7 +605,13 @@
         /**
          */
         public synchronized TaskStatus createStatus() {
-            TaskStatus status = new TaskStatus(task.getTaskId(), task.isMapTask(), progress, runstate, diagnosticInfo.toString(), (stateString == null) ? "" : stateString, "");
+            TaskStatus status = 
+              new TaskStatus(task.getTaskId(), 
+                             task.isMapTask(),
+                             progress, runstate, 
+                             diagnosticInfo.toString(), 
+                             (stateString == null) ? "" : stateString, 
+                              getName());
             if (diagnosticInfo.length() > 0) {
                 diagnosticInfo = new StringBuffer();
             }
@@ -902,6 +931,7 @@
                     LOG.info("Ping exception: " + msg);
                     remainingRetries -=1;
                     if (remainingRetries == 0) {
+                      getCallStacks();
                       LOG.warn("Last retry, killing "+taskid);
                       System.exit(65);
                     }

Modified: lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp (original)
+++ lucene/hadoop/trunk/src/webapps/job/jobfailures.jsp Tue Jun 20 11:52:07 2006
@@ -20,12 +20,20 @@
     String tipId = tip.getTIPId();
     for(int i=0; i < statuses.length; ++i) {
       if (statuses[i].getRunState() == TaskStatus.FAILED) {
+        String taskTrackerName = statuses[i].getTaskTracker();
+        TaskTrackerStatus taskTracker = tracker.getTaskTracker(taskTrackerName);
         out.print("<tr><td>" + statuses[i].getTaskId() +
                   "</td><td><a href=\"/taskdetails.jsp?jobid="+ jobId + 
                   "&taskid=" + tipId + "\">" + tipId +
-                  "</a></td><td>" + statuses[i].getHostname() +
-                  "</td><td>" + statuses[i].getDiagnosticInfo() +
-                  "</td></tr>\n");
+                  "</a></td>");
+        if (taskTracker == null) {
+          out.print("<td>" + taskTrackerName + "</td>");
+        } else {
+          out.print("<td><a href=\"http://" + taskTracker.getHost() + ":" +
+                    taskTracker.getHttpPort() + "\">" +  taskTracker.getHost() + 
+                    "</a></td>");
+        }
+        out.print("<td>" + statuses[i].getDiagnosticInfo() + "</td></tr>\n");
       }
     }
   }

Modified: lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp?rev=415765&r1=415764&r2=415765&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp (original)
+++ lucene/hadoop/trunk/src/webapps/job/taskdetails.jsp Tue Jun 20 11:52:07 2006
@@ -52,8 +52,16 @@
   <%
     for (int i = 0; i < ts.length; i++) {
       TaskStatus status = ts[i];
+      String taskTrackerName = status.getTaskTracker();
+      TaskTrackerStatus taskTracker = tracker.getTaskTracker(taskTrackerName);
       out.print("<tr><td>" + status.getTaskId() + "</td>");
-      out.print("<td>" + status.getHostname() + "</td>");
+      if (taskTracker == null) {
+        out.print("<td>" + taskTrackerName + "</td>");
+      } else {
+        out.print("<td><a href=\"http://" + taskTracker.getHost() + ":" +
+                  taskTracker.getHttpPort() + "\">" +  taskTracker.getHost() + 
+                  "</a></td>");
+      }
       out.print("<td>");
       writeString(out, status.getRunState()); 
       out.print("</td>");