You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ac...@apache.org on 2011/04/09 00:35:13 UTC

svn commit: r1090472 - in /hadoop/common/branches/branch-0.20-security: ./ src/mapred/org/apache/hadoop/mapred/ src/webapps/history/

Author: acmurthy
Date: Fri Apr  8 22:35:12 2011
New Revision: 1090472

URL: http://svn.apache.org/viewvc?rev=1090472&view=rev
Log:
MAPREDUCE-2418. Show job errors in JobHistory page. Contributed by Siddharth Seth.

Modified:
    hadoop/common/branches/branch-0.20-security/CHANGES.txt
    hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java
    hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java
    hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
    hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
    hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp

Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Fri Apr  8 22:35:12 2011
@@ -4,6 +4,9 @@ Release 0.20.204.0 - unreleased
 
   BUG FIXES
 
+    MAPREDUCE-2418. Show job errors in JobHistory page. (Siddharth Seth via
+    acmurthy) 
+
     HDFS-1592. At Startup, Valid volumes required in FSDataset doesn't
     handle consistently with volumes tolerated. (Bharath Mundlapudi)
 

Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java Fri Apr  8 22:35:12 2011
@@ -383,7 +383,9 @@ public class JobClient extends Configure
         "file: " + profile.getJobFile() + "\n" + 
         "tracking URL: " + profile.getURL() + "\n" + 
         "map() completion: " + status.mapProgress() + "\n" + 
-        "reduce() completion: " + status.reduceProgress();
+        "reduce() completion: " + status.reduceProgress() + "\n" +
+        ((status.getRunState() == JobStatus.FAILED) ? ("Failure Info: " + status.getFailureInfo()) : "");
+      
     }
         
     /**
@@ -1643,13 +1645,23 @@ public class JobClient extends Configure
         if (job == null) {
           System.out.println("Could not find job " + jobid);
         } else {
-          Counters counters = job.getCounters();
+          Throwable counterException = null;
+          Counters counters = null;
+          try {
+            counters = job.getCounters();
+          } catch (IOException e) {
+            counterException = e;
+          }
           System.out.println();
           System.out.println(job);
           if (counters != null) {
             System.out.println(counters);
           } else {
-            System.out.println("Counters not available. Job is retired.");
+            if (counterException != null) {
+              System.out.println("Error fetching counters: " + counterException.getMessage());
+            } else {
+              System.out.println("Counters not available. Job is retired.");
+            }
           }
           exitCode = 0;
         }
@@ -1658,10 +1670,21 @@ public class JobClient extends Configure
         if (job == null) {
           System.out.println("Could not find job " + jobid);
         } else {
-          Counters counters = job.getCounters();
+          Throwable counterException = null;
+          Counters counters = null;
+          try {
+            counters = job.getCounters();
+          } catch (IOException e) {
+            counterException = e;
+          }
           if (counters == null) {
-            System.out.println("Counters not available for retired job " + 
-                jobid);
+            if (counterException != null) {
+              System.out
+                  .println("Error fetching counters: " + counterException.getMessage());
+            } else {
+              System.out.println("Counters not available for retired job "
+                  + jobid);
+            }
             exitCode = -1;
           } else {
             Group group = counters.getGroup(counterGroupName);

Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java Fri Apr  8 22:35:12 2011
@@ -476,7 +476,7 @@ public class JobHistory {
     ERROR, TASK_ATTEMPT_ID, TASK_STATUS, COPY_PHASE, SORT_PHASE, REDUCE_PHASE, 
     SHUFFLE_FINISHED, SORT_FINISHED, COUNTERS, SPLITS, JOB_PRIORITY, HTTP_PORT, 
     TRACKER_NAME, STATE_STRING, VERSION, MAP_COUNTERS, REDUCE_COUNTERS,
-    VIEW_JOB, MODIFY_JOB, JOB_QUEUE
+    VIEW_JOB, MODIFY_JOB, JOB_QUEUE, FAIL_REASON
   }
 
   /**
@@ -1926,14 +1926,14 @@ public class JobHistory {
      * @param finishedMaps no finished map tasks. 
      * @param finishedReduces no of finished reduce tasks. 
      */
-    public static void logFailed(JobID jobid, long timestamp, int finishedMaps, int finishedReduces){
+    public static void logFailed(JobID jobid, long timestamp, int finishedMaps, int finishedReduces, String failReason){
       ArrayList<PrintWriter> writer = fileManager.getWriters(jobid); 
 
       if (null != writer){
         JobHistory.log(writer, RecordTypes.Job,
-                       new Keys[] {Keys.JOBID, Keys.FINISH_TIME, Keys.JOB_STATUS, Keys.FINISHED_MAPS, Keys.FINISHED_REDUCES },
+                       new Keys[] {Keys.JOBID, Keys.FINISH_TIME, Keys.JOB_STATUS, Keys.FINISHED_MAPS, Keys.FINISHED_REDUCES, Keys.FAIL_REASON },
                        new String[] {jobid.toString(),  String.valueOf(timestamp), Values.FAILED.name(), String.valueOf(finishedMaps), 
-                                     String.valueOf(finishedReduces)}, jobid); 
+                                     String.valueOf(finishedReduces), failReason}, jobid); 
         for (PrintWriter out : writer) {
           out.close();
         }

Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java Fri Apr  8 22:35:12 2011
@@ -2724,7 +2724,8 @@ public class JobInProgress {
         // Log to job-history
         JobHistory.JobInfo.logFailed(this.status.getJobID(), finishTime, 
                                      this.finishedMapTasks, 
-                                     this.finishedReduceTasks);
+                                     this.finishedReduceTasks,
+                                     this.status.getFailureInfo());
       } else {
         changeStateTo(JobStatus.KILLED);
 
@@ -3022,6 +3023,19 @@ public class JobInProgress {
             ((++failedReduceTIPs*100) > (reduceFailuresPercent*numReduceTasks));
       
       if (killJob) {
+        String failureInfo = "";
+        if (tip.isJobCleanupTask()) {
+          failureInfo = "JobCleanup Task Failure, Task: " + tip.getTIPId();
+        } else if (tip.isJobSetupTask()) {
+          failureInfo = "JobSetup Task Failure, Task: " + tip.getTIPId();
+        } else if (tip.isMapTask()) {
+          failureInfo = "# of failed Map Tasks exceeded allowed limit. FailedCount: "
+              + failedMapTIPs + ". LastFailedTask: " + tip.getTIPId();
+        } else {
+          failureInfo = "# of failed Reduce Tasks exceeded allowed limit. FailedCount: "
+              + failedReduceTIPs + ". LastFailedTask: " + tip.getTIPId();
+        }
+        this.status.setFailureInfo(failureInfo);
         LOG.info("Aborting job " + profile.getJobID());
         JobHistory.Task.logFailed(tip.getTIPId(), 
                                   taskType,  

Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java Fri Apr  8 22:35:12 2011
@@ -1354,8 +1354,10 @@ public class JobTracker implements MRCon
           try {
             initJob(jip);
           } catch (Throwable t) {
-            LOG.error("Job initialization failed : \n" 
-                      + StringUtils.stringifyException(t));
+            LOG.error("Job initialization failed : \n"
+                + StringUtils.stringifyException(t));
+            jip.status.setFailureInfo("Job Initialization failed: \n"
+                + StringUtils.stringifyException(t));
             failJob(jip);
             throw new IOException(t);
           }

Modified: hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp (original)
+++ hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp Fri Apr  8 22:35:12 2011
@@ -56,6 +56,7 @@
 <b>Launched At: </b> <%=StringUtils.getFormattedTimeWithDiff(dateFormat, job.getLong(Keys.LAUNCH_TIME), job.getLong(Keys.SUBMIT_TIME)) %><br/>
 <b>Finished At: </b>  <%=StringUtils.getFormattedTimeWithDiff(dateFormat, job.getLong(Keys.FINISH_TIME), job.getLong(Keys.LAUNCH_TIME)) %><br/>
 <b>Status: </b> <%= ((job.get(Keys.JOB_STATUS) == "")?"Incomplete" :job.get(Keys.JOB_STATUS)) %><br/> 
+<b>Failure Info: </b> <%= ((job.get(Keys.FAIL_REASON) == null)?"NA" : job.get(Keys.FAIL_REASON)) %><br/>
 <%
     Map<String, JobHistory.Task> tasks = job.getAllTasks();
     int totalMaps = 0 ;