You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ac...@apache.org on 2011/04/09 00:35:13 UTC
svn commit: r1090472 - in /hadoop/common/branches/branch-0.20-security: ./
src/mapred/org/apache/hadoop/mapred/ src/webapps/history/
Author: acmurthy
Date: Fri Apr 8 22:35:12 2011
New Revision: 1090472
URL: http://svn.apache.org/viewvc?rev=1090472&view=rev
Log:
MAPREDUCE-2418. Show job errors in JobHistory page. Contributed by Siddharth Seth.
Modified:
hadoop/common/branches/branch-0.20-security/CHANGES.txt
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp
Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Fri Apr 8 22:35:12 2011
@@ -4,6 +4,9 @@ Release 0.20.204.0 - unreleased
BUG FIXES
+ MAPREDUCE-2418. Show job errors in JobHistory page. (Siddharth Seth via
+ acmurthy)
+
HDFS-1592. At Startup, Valid volumes required in FSDataset doesn't
handle consistently with volumes tolerated. (Bharath Mundlapudi)
Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobClient.java Fri Apr 8 22:35:12 2011
@@ -383,7 +383,9 @@ public class JobClient extends Configure
"file: " + profile.getJobFile() + "\n" +
"tracking URL: " + profile.getURL() + "\n" +
"map() completion: " + status.mapProgress() + "\n" +
- "reduce() completion: " + status.reduceProgress();
+ "reduce() completion: " + status.reduceProgress() + "\n" +
+ ((status.getRunState() == JobStatus.FAILED) ? ("Failure Info: " + status.getFailureInfo()) : "");
+
}
/**
@@ -1643,13 +1645,23 @@ public class JobClient extends Configure
if (job == null) {
System.out.println("Could not find job " + jobid);
} else {
- Counters counters = job.getCounters();
+ Throwable counterException = null;
+ Counters counters = null;
+ try {
+ counters = job.getCounters();
+ } catch (IOException e) {
+ counterException = e;
+ }
System.out.println();
System.out.println(job);
if (counters != null) {
System.out.println(counters);
} else {
- System.out.println("Counters not available. Job is retired.");
+ if (counterException != null) {
+ System.out.println("Error fetching counters: " + counterException.getMessage());
+ } else {
+ System.out.println("Counters not available. Job is retired.");
+ }
}
exitCode = 0;
}
@@ -1658,10 +1670,21 @@ public class JobClient extends Configure
if (job == null) {
System.out.println("Could not find job " + jobid);
} else {
- Counters counters = job.getCounters();
+ Throwable counterException = null;
+ Counters counters = null;
+ try {
+ counters = job.getCounters();
+ } catch (IOException e) {
+ counterException = e;
+ }
if (counters == null) {
- System.out.println("Counters not available for retired job " +
- jobid);
+ if (counterException != null) {
+ System.out
+ .println("Error fetching counters: " + counterException.getMessage());
+ } else {
+ System.out.println("Counters not available for retired job "
+ + jobid);
+ }
exitCode = -1;
} else {
Group group = counters.getGroup(counterGroupName);
Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobHistory.java Fri Apr 8 22:35:12 2011
@@ -476,7 +476,7 @@ public class JobHistory {
ERROR, TASK_ATTEMPT_ID, TASK_STATUS, COPY_PHASE, SORT_PHASE, REDUCE_PHASE,
SHUFFLE_FINISHED, SORT_FINISHED, COUNTERS, SPLITS, JOB_PRIORITY, HTTP_PORT,
TRACKER_NAME, STATE_STRING, VERSION, MAP_COUNTERS, REDUCE_COUNTERS,
- VIEW_JOB, MODIFY_JOB, JOB_QUEUE
+ VIEW_JOB, MODIFY_JOB, JOB_QUEUE, FAIL_REASON
}
/**
@@ -1926,14 +1926,14 @@ public class JobHistory {
* @param finishedMaps no finished map tasks.
* @param finishedReduces no of finished reduce tasks.
*/
- public static void logFailed(JobID jobid, long timestamp, int finishedMaps, int finishedReduces){
+ public static void logFailed(JobID jobid, long timestamp, int finishedMaps, int finishedReduces, String failReason){
ArrayList<PrintWriter> writer = fileManager.getWriters(jobid);
if (null != writer){
JobHistory.log(writer, RecordTypes.Job,
- new Keys[] {Keys.JOBID, Keys.FINISH_TIME, Keys.JOB_STATUS, Keys.FINISHED_MAPS, Keys.FINISHED_REDUCES },
+ new Keys[] {Keys.JOBID, Keys.FINISH_TIME, Keys.JOB_STATUS, Keys.FINISHED_MAPS, Keys.FINISHED_REDUCES, Keys.FAIL_REASON },
new String[] {jobid.toString(), String.valueOf(timestamp), Values.FAILED.name(), String.valueOf(finishedMaps),
- String.valueOf(finishedReduces)}, jobid);
+ String.valueOf(finishedReduces), failReason}, jobid);
for (PrintWriter out : writer) {
out.close();
}
Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobInProgress.java Fri Apr 8 22:35:12 2011
@@ -2724,7 +2724,8 @@ public class JobInProgress {
// Log to job-history
JobHistory.JobInfo.logFailed(this.status.getJobID(), finishTime,
this.finishedMapTasks,
- this.finishedReduceTasks);
+ this.finishedReduceTasks,
+ this.status.getFailureInfo());
} else {
changeStateTo(JobStatus.KILLED);
@@ -3022,6 +3023,19 @@ public class JobInProgress {
((++failedReduceTIPs*100) > (reduceFailuresPercent*numReduceTasks));
if (killJob) {
+ String failureInfo = "";
+ if (tip.isJobCleanupTask()) {
+ failureInfo = "JobCleanup Task Failure, Task: " + tip.getTIPId();
+ } else if (tip.isJobSetupTask()) {
+ failureInfo = "JobSetup Task Failure, Task: " + tip.getTIPId();
+ } else if (tip.isMapTask()) {
+ failureInfo = "# of failed Map Tasks exceeded allowed limit. FailedCount: "
+ + failedMapTIPs + ". LastFailedTask: " + tip.getTIPId();
+ } else {
+ failureInfo = "# of failed Reduce Tasks exceeded allowed limit. FailedCount: "
+ + failedReduceTIPs + ". LastFailedTask: " + tip.getTIPId();
+ }
+ this.status.setFailureInfo(failureInfo);
LOG.info("Aborting job " + profile.getJobID());
JobHistory.Task.logFailed(tip.getTIPId(),
taskType,
Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/JobTracker.java Fri Apr 8 22:35:12 2011
@@ -1354,8 +1354,10 @@ public class JobTracker implements MRCon
try {
initJob(jip);
} catch (Throwable t) {
- LOG.error("Job initialization failed : \n"
- + StringUtils.stringifyException(t));
+ LOG.error("Job initialization failed : \n"
+ + StringUtils.stringifyException(t));
+ jip.status.setFailureInfo("Job Initialization failed: \n"
+ + StringUtils.stringifyException(t));
failJob(jip);
throw new IOException(t);
}
Modified: hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp?rev=1090472&r1=1090471&r2=1090472&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp (original)
+++ hadoop/common/branches/branch-0.20-security/src/webapps/history/jobdetailshistory.jsp Fri Apr 8 22:35:12 2011
@@ -56,6 +56,7 @@
<b>Launched At: </b> <%=StringUtils.getFormattedTimeWithDiff(dateFormat, job.getLong(Keys.LAUNCH_TIME), job.getLong(Keys.SUBMIT_TIME)) %><br/>
<b>Finished At: </b> <%=StringUtils.getFormattedTimeWithDiff(dateFormat, job.getLong(Keys.FINISH_TIME), job.getLong(Keys.LAUNCH_TIME)) %><br/>
<b>Status: </b> <%= ((job.get(Keys.JOB_STATUS) == "")?"Incomplete" :job.get(Keys.JOB_STATUS)) %><br/>
+<b>Failure Info: </b> <%= ((job.get(Keys.FAIL_REASON) == null)?"NA" : job.get(Keys.FAIL_REASON)) %><br/>
<%
Map<String, JobHistory.Task> tasks = job.getAllTasks();
int totalMaps = 0 ;