You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2009/07/21 11:27:09 UTC
svn commit: r796210 - in /hadoop/common/branches/branch-0.20: ./
src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/
Author: sharad
Date: Tue Jul 21 09:27:02 2009
New Revision: 796210
URL: http://svn.apache.org/viewvc?rev=796210&view=rev
Log:
MAPREDUCE-430. Reverting the patch.
Modified:
hadoop/common/branches/branch-0.20/CHANGES.txt
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java
Modified: hadoop/common/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/CHANGES.txt?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20/CHANGES.txt Tue Jul 21 09:27:02 2009
@@ -184,9 +184,6 @@
MAPREDUCE-18. Puts some checks to detect cases where jetty serves up
incorrect output during shuffle. (Ravi Gummadi via ddas)
- MAPREDUCE-430. Fix bug related to Task getting stuck due to
- OutOfMemoryErrors. (Amar Kamat via sharad)
-
Release 0.20.0 - 2009-04-15
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java Tue Jul 21 09:27:02 2009
@@ -180,13 +180,9 @@
break;
}
}
- } catch (Error e) {
- String error = "Error";
- if (e instanceof FSError) {
- error = "FSError";
- }
- LOG.fatal(error + " from child", e);
- umbilical.taskError(taskid, e.getMessage());
+ } catch (FSError e) {
+ LOG.fatal("FSError from child", e);
+ umbilical.fsError(taskid, e.getMessage());
} catch (Throwable throwable) {
LOG.warn("Error running child", throwable);
try {
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java Tue Jul 21 09:27:02 2009
@@ -51,9 +51,12 @@
LOG.info("Task " + taskid + " reporting done.");
}
- public void taskError(TaskAttemptID taskId, String message)
- throws IOException {
- LOG.info("Task " + taskId + " reporting task error: " + message);
+ public void fsError(TaskAttemptID taskId, String message) throws IOException {
+ LOG.info("Task " + taskId + " reporting file system error: " + message);
+ }
+
+ public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
+ LOG.info("Task " + taskId + " reporting shuffle error: " + message);
}
public JvmTask getTask(JVMId jvmId) throws IOException {
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java Tue Jul 21 09:27:02 2009
@@ -333,9 +333,13 @@
}
}
- public void taskError(TaskAttemptID taskId, String message)
+ public synchronized void fsError(TaskAttemptID taskId, String message)
throws IOException {
- LOG.fatal("Error: "+ message + "from task: " + taskId);
+ LOG.fatal("FSError: "+ message + "from task: " + taskId);
+ }
+
+ public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
+ LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
}
public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId,
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java Tue Jul 21 09:27:02 2009
@@ -376,8 +376,8 @@
if(reduceCopier.mergeThrowable instanceof FSError) {
LOG.error("Task: " + getTaskID() + " - FSError: " +
StringUtils.stringifyException(reduceCopier.mergeThrowable));
- umbilical.taskError(getTaskID(),
- "(FSError) " + reduceCopier.mergeThrowable.getMessage());
+ umbilical.fsError(getTaskID(),
+ reduceCopier.mergeThrowable.getMessage());
}
throw new IOException("Task: " + getTaskID() +
" - The reduce copier failed", reduceCopier.mergeThrowable);
@@ -1214,8 +1214,7 @@
LOG.error("Task: " + reduceTask.getTaskID() + " - FSError: " +
StringUtils.stringifyException(e));
try {
- umbilical.taskError(reduceTask.getTaskID(), "(FSError) "
- + e.getMessage());
+ umbilical.fsError(reduceTask.getTaskID(), e.getMessage());
} catch (IOException io) {
LOG.error("Could not notify TT of FSError: " +
StringUtils.stringifyException(io));
@@ -2092,9 +2091,9 @@
LOG.fatal("Shuffle failed with too many fetch failures " +
"and insufficient progress!" +
"Killing task " + getTaskID() + ".");
- umbilical.taskError(getTaskID(), "(Shuffle Error) "
- + "Exceeded MAX_FAILED_UNIQUE_FETCHES;"
- + " bailing-out.");
+ umbilical.shuffleError(getTaskID(),
+ "Exceeded MAX_FAILED_UNIQUE_FETCHES;"
+ + " bailing-out.");
}
}
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java Tue Jul 21 09:27:02 2009
@@ -419,16 +419,12 @@
exitCode + ".");
}
}
- } catch (Error e) {
- String error = "Error";
- if (e instanceof FSError) {
- error = "FSError";
- }
- LOG.fatal(error, e);
+ } catch (FSError e) {
+ LOG.fatal("FSError", e);
try {
- tracker.taskError(t.getTaskID(), e.getMessage());
+ tracker.fsError(t.getTaskID(), e.getMessage());
} catch (IOException ie) {
- LOG.fatal(t.getTaskID()+" reporting " + error, ie);
+ LOG.fatal(t.getTaskID()+" reporting FSError", ie);
}
} catch (Throwable throwable) {
LOG.warn(t.getTaskID()+" Child Error", throwable);
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Tue Jul 21 09:27:02 2009
@@ -2570,13 +2570,24 @@
/**
+ * A reduce-task failed to shuffle the map-outputs. Kill the task.
+ */
+ public synchronized void shuffleError(TaskAttemptID taskId, String message)
+ throws IOException {
+ LOG.fatal("Task: " + taskId + " - Killed due to Shuffle Failure: " + message);
+ TaskInProgress tip = runningTasks.get(taskId);
+ tip.reportDiagnosticInfo("Shuffle Error: " + message);
+ purgeTask(tip, true);
+ }
+
+ /**
* A child task had a local filesystem error. Kill the task.
*/
- public synchronized void taskError(TaskAttemptID taskId, String message)
+ public synchronized void fsError(TaskAttemptID taskId, String message)
throws IOException {
- LOG.fatal("Task: " + taskId + " - Killed due to : " + message);
+ LOG.fatal("Task: " + taskId + " - Killed due to FSError: " + message);
TaskInProgress tip = runningTasks.get(taskId);
- tip.reportDiagnosticInfo(message);
+ tip.reportDiagnosticInfo("FSError: " + message);
purgeTask(tip, true);
}
Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java Tue Jul 21 09:27:02 2009
@@ -53,10 +53,9 @@
* Version 13 changed the getTask method signature for HADOOP-249
* Version 14 changed the getTask method signature for HADOOP-4232
* Version 15 Adds FAILED_UNCLEAN and KILLED_UNCLEAN states for HADOOP-4759
- * Version 16 Removed fsError and shuffleError and introduced taskError.
* */
- public static final long versionID = 16L;
+ public static final long versionID = 15L;
/**
* Called when a child task process starts, to get its task.
@@ -123,8 +122,11 @@
*/
boolean canCommit(TaskAttemptID taskid) throws IOException;
- /** Report that the task encountered an error.*/
- void taskError(TaskAttemptID taskId, String message) throws IOException;
+ /** Report that a reduce-task couldn't shuffle map-outputs.*/
+ void shuffleError(TaskAttemptID taskId, String message) throws IOException;
+
+ /** Report that the task encounted a local filesystem error.*/
+ void fsError(TaskAttemptID taskId, String message) throws IOException;
/** Called by a reduce task to get the map output locations for finished maps.
* Returns an update centered around the map-task-completion-events.
Modified: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java (original)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java Tue Jul 21 09:27:02 2009
@@ -49,9 +49,7 @@
throw new IOException();
} else if (taskid.endsWith("_1")) {
System.exit(-1);
- } else if (taskid.endsWith("_2")) {
- throw new OutOfMemoryError();
- }
+ }
}
}
@@ -108,57 +106,46 @@
return new JobClient(conf).submitJob(conf);
}
- private void validateAttempt(TaskInProgress tip, TaskAttemptID attemptId,
- TaskStatus ts, boolean isCleanup)
- throws IOException {
- assertEquals(tip.isCleanupAttempt(attemptId), isCleanup);
- assertTrue(ts != null);
- assertEquals(TaskStatus.State.FAILED, ts.getRunState());
- // validate tasklogs for task attempt
- String log = TestMiniMRMapRedDebugScript.readTaskLog(
- TaskLog.LogName.STDERR, attemptId, false);
- assertTrue(log.contains(taskLog));
- if (!isCleanup) {
- // validate task logs: tasklog should contain both task logs
- // and cleanup logs
- assertTrue(log.contains(cleanupLog));
- } else {
- // validate tasklogs for cleanup attempt
- log = TestMiniMRMapRedDebugScript.readTaskLog(
- TaskLog.LogName.STDERR, attemptId, true);
- assertTrue(log.contains(cleanupLog));
- }
- }
-
private void validateJob(RunningJob job, MiniMRCluster mr)
throws IOException {
assertEquals(JobStatus.SUCCEEDED, job.getJobState());
JobID jobId = job.getID();
// construct the task id of first map task
- // this should not be cleanup attempt since the first attempt
- // fails with an exception
TaskAttemptID attemptId =
new TaskAttemptID(new TaskID(jobId, true, 0), 0);
TaskInProgress tip = mr.getJobTrackerRunner().getJobTracker().
getTip(attemptId.getTaskID());
+ // this should not be cleanup attempt since the first attempt
+ // fails with an exception
+ assertTrue(!tip.isCleanupAttempt(attemptId));
TaskStatus ts =
mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId);
- validateAttempt(tip, attemptId, ts, false);
+ assertTrue(ts != null);
+ assertEquals(TaskStatus.State.FAILED, ts.getRunState());
+ // validate task logs: tasklog should contain both task logs
+ // and cleanup logs
+ String log = TestMiniMRMapRedDebugScript.readTaskLog(
+ TaskLog.LogName.STDERR, attemptId, false);
+ assertTrue(log.contains(taskLog));
+ assertTrue(log.contains(cleanupLog));
attemptId = new TaskAttemptID(new TaskID(jobId, true, 0), 1);
// this should be cleanup attempt since the second attempt fails
// with System.exit
-
+ assertTrue(tip.isCleanupAttempt(attemptId));
ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId);
+ assertTrue(ts != null);
+ assertEquals(TaskStatus.State.FAILED, ts.getRunState());
+ // validate tasklogs for task attempt
+ log = TestMiniMRMapRedDebugScript.readTaskLog(
+ TaskLog.LogName.STDERR, attemptId, false);
+ assertTrue(log.contains(taskLog));
- validateAttempt(tip, attemptId, ts, true);
-
- attemptId = new TaskAttemptID(new TaskID(jobId, true, 0), 2);
- // this should be cleanup attempt since the third attempt fails
- // with OutOfMemory
- ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId);
- validateAttempt(tip, attemptId, ts, true);
+ // validate tasklogs for cleanup attempt
+ log = TestMiniMRMapRedDebugScript.readTaskLog(
+ TaskLog.LogName.STDERR, attemptId, true);
+ assertTrue(log.contains(cleanupLog));
}
public void testWithDFS() throws IOException {