You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2009/07/21 11:27:09 UTC

svn commit: r796210 - in /hadoop/common/branches/branch-0.20: ./ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/

Author: sharad
Date: Tue Jul 21 09:27:02 2009
New Revision: 796210

URL: http://svn.apache.org/viewvc?rev=796210&view=rev
Log:
MAPREDUCE-430. Reverting the patch.

Modified:
    hadoop/common/branches/branch-0.20/CHANGES.txt
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
    hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
    hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java

Modified: hadoop/common/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/CHANGES.txt?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20/CHANGES.txt Tue Jul 21 09:27:02 2009
@@ -184,9 +184,6 @@
     MAPREDUCE-18. Puts some checks to detect cases where jetty serves up
     incorrect output during shuffle. (Ravi Gummadi via ddas)
 
-    MAPREDUCE-430. Fix bug related to Task getting stuck due to
-    OutOfMemoryErrors. (Amar Kamat via sharad)
-
 Release 0.20.0 - 2009-04-15
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/Child.java Tue Jul 21 09:27:02 2009
@@ -180,13 +180,9 @@
           break;
         }
       }
-    } catch (Error e) {
-      String error = "Error";
-      if (e instanceof FSError) {
-       error = "FSError";
-      }
-      LOG.fatal(error + " from child", e);
-      umbilical.taskError(taskid, e.getMessage());
+    } catch (FSError e) {
+      LOG.fatal("FSError from child", e);
+      umbilical.fsError(taskid, e.getMessage());
     } catch (Throwable throwable) {
       LOG.warn("Error running child", throwable);
       try {

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java Tue Jul 21 09:27:02 2009
@@ -51,9 +51,12 @@
       LOG.info("Task " + taskid + " reporting done.");
     }
 
-    public void taskError(TaskAttemptID taskId, String message) 
-    throws IOException {
-      LOG.info("Task " + taskId + " reporting task error: " + message);
+    public void fsError(TaskAttemptID taskId, String message) throws IOException {
+      LOG.info("Task " + taskId + " reporting file system error: " + message);
+    }
+
+    public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
+      LOG.info("Task " + taskId + " reporting shuffle error: " + message);
     }
 
     public JvmTask getTask(JVMId jvmId) throws IOException {

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java Tue Jul 21 09:27:02 2009
@@ -333,9 +333,13 @@
       }
     }
 
-    public void taskError(TaskAttemptID taskId, String message) 
+    public synchronized void fsError(TaskAttemptID taskId, String message) 
     throws IOException {
-      LOG.fatal("Error: "+ message + "from task: " + taskId);
+      LOG.fatal("FSError: "+ message + "from task: " + taskId);
+    }
+
+    public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
+      LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
     }
     
     public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId, 

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java Tue Jul 21 09:27:02 2009
@@ -376,8 +376,8 @@
         if(reduceCopier.mergeThrowable instanceof FSError) {
           LOG.error("Task: " + getTaskID() + " - FSError: " + 
               StringUtils.stringifyException(reduceCopier.mergeThrowable));
-          umbilical.taskError(getTaskID(), 
-              "(FSError) " + reduceCopier.mergeThrowable.getMessage());
+          umbilical.fsError(getTaskID(), 
+              reduceCopier.mergeThrowable.getMessage());
         }
         throw new IOException("Task: " + getTaskID() + 
             " - The reduce copier failed", reduceCopier.mergeThrowable);
@@ -1214,8 +1214,7 @@
             LOG.error("Task: " + reduceTask.getTaskID() + " - FSError: " + 
                       StringUtils.stringifyException(e));
             try {
-              umbilical.taskError(reduceTask.getTaskID(), "(FSError) " 
-                                  + e.getMessage());
+              umbilical.fsError(reduceTask.getTaskID(), e.getMessage());
             } catch (IOException io) {
               LOG.error("Could not notify TT of FSError: " + 
                       StringUtils.stringifyException(io));
@@ -2092,9 +2091,9 @@
                   LOG.fatal("Shuffle failed with too many fetch failures " + 
                             "and insufficient progress!" +
                             "Killing task " + getTaskID() + ".");
-                  umbilical.taskError(getTaskID(), "(Shuffle Error) " 
-                                      + "Exceeded MAX_FAILED_UNIQUE_FETCHES;"
-                                      + " bailing-out.");
+                  umbilical.shuffleError(getTaskID(), 
+                                         "Exceeded MAX_FAILED_UNIQUE_FETCHES;"
+                                         + " bailing-out.");
                 }
               }
                 

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskRunner.java Tue Jul 21 09:27:02 2009
@@ -419,16 +419,12 @@
               exitCode + ".");
         }
       }
-    } catch (Error e) {
-      String error = "Error";
-      if (e instanceof FSError) {
-        error = "FSError";
-      }
-      LOG.fatal(error, e);
+    } catch (FSError e) {
+      LOG.fatal("FSError", e);
       try {
-        tracker.taskError(t.getTaskID(), e.getMessage());
+        tracker.fsError(t.getTaskID(), e.getMessage());
       } catch (IOException ie) {
-        LOG.fatal(t.getTaskID()+" reporting " + error, ie);
+        LOG.fatal(t.getTaskID()+" reporting FSError", ie);
       }
     } catch (Throwable throwable) {
       LOG.warn(t.getTaskID()+" Child Error", throwable);

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Tue Jul 21 09:27:02 2009
@@ -2570,13 +2570,24 @@
 
 
   /** 
+   * A reduce-task failed to shuffle the map-outputs. Kill the task.
+   */  
+  public synchronized void shuffleError(TaskAttemptID taskId, String message) 
+  throws IOException { 
+    LOG.fatal("Task: " + taskId + " - Killed due to Shuffle Failure: " + message);
+    TaskInProgress tip = runningTasks.get(taskId);
+    tip.reportDiagnosticInfo("Shuffle Error: " + message);
+    purgeTask(tip, true);
+  }
+
+  /** 
    * A child task had a local filesystem error. Kill the task.
    */  
-  public synchronized void taskError(TaskAttemptID taskId, String message) 
+  public synchronized void fsError(TaskAttemptID taskId, String message) 
   throws IOException {
-    LOG.fatal("Task: " + taskId + " - Killed due to : " + message);
+    LOG.fatal("Task: " + taskId + " - Killed due to FSError: " + message);
     TaskInProgress tip = runningTasks.get(taskId);
-    tip.reportDiagnosticInfo(message);
+    tip.reportDiagnosticInfo("FSError: " + message);
     purgeTask(tip, true);
   }
 

Modified: hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java (original)
+++ hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java Tue Jul 21 09:27:02 2009
@@ -53,10 +53,9 @@
    * Version 13 changed the getTask method signature for HADOOP-249
    * Version 14 changed the getTask method signature for HADOOP-4232
    * Version 15 Adds FAILED_UNCLEAN and KILLED_UNCLEAN states for HADOOP-4759
-   * Version 16 Removed fsError and shuffleError and introduced taskError.
    * */
 
-  public static final long versionID = 16L;
+  public static final long versionID = 15L;
   
   /**
    * Called when a child task process starts, to get its task.
@@ -123,8 +122,11 @@
    */
   boolean canCommit(TaskAttemptID taskid) throws IOException;
 
-  /** Report that the task encountered an error.*/
-  void taskError(TaskAttemptID taskId, String message) throws IOException;
+  /** Report that a reduce-task couldn't shuffle map-outputs.*/
+  void shuffleError(TaskAttemptID taskId, String message) throws IOException;
+  
+  /** Report that the task encounted a local filesystem error.*/
+  void fsError(TaskAttemptID taskId, String message) throws IOException;
 
   /** Called by a reduce task to get the map output locations for finished maps.
    * Returns an update centered around the map-task-completion-events. 

Modified: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java?rev=796210&r1=796209&r2=796210&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java (original)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskFail.java Tue Jul 21 09:27:02 2009
@@ -49,9 +49,7 @@
         throw new IOException();
       } else if (taskid.endsWith("_1")) {
         System.exit(-1);
-      } else if (taskid.endsWith("_2")) {
-        throw new OutOfMemoryError();
-      }
+      } 
     }
   }
 
@@ -108,57 +106,46 @@
     return new JobClient(conf).submitJob(conf);
   }
   
-  private void validateAttempt(TaskInProgress tip, TaskAttemptID attemptId, 
-                               TaskStatus ts, boolean isCleanup) 
-  throws IOException {
-    assertEquals(tip.isCleanupAttempt(attemptId), isCleanup);
-    assertTrue(ts != null);
-    assertEquals(TaskStatus.State.FAILED, ts.getRunState());
-    // validate tasklogs for task attempt
-    String log = TestMiniMRMapRedDebugScript.readTaskLog(
-                      TaskLog.LogName.STDERR, attemptId, false);
-    assertTrue(log.contains(taskLog));
-    if (!isCleanup) {
-      // validate task logs: tasklog should contain both task logs
-      // and cleanup logs
-      assertTrue(log.contains(cleanupLog));
-    } else {
-      // validate tasklogs for cleanup attempt
-      log = TestMiniMRMapRedDebugScript.readTaskLog(
-                 TaskLog.LogName.STDERR, attemptId, true);
-      assertTrue(log.contains(cleanupLog));
-    }
-  }
-
   private void validateJob(RunningJob job, MiniMRCluster mr) 
   throws IOException {
     assertEquals(JobStatus.SUCCEEDED, job.getJobState());
 	    
     JobID jobId = job.getID();
     // construct the task id of first map task
-    // this should not be cleanup attempt since the first attempt 
-    // fails with an exception
     TaskAttemptID attemptId = 
       new TaskAttemptID(new TaskID(jobId, true, 0), 0);
     TaskInProgress tip = mr.getJobTrackerRunner().getJobTracker().
                             getTip(attemptId.getTaskID());
+    // this should not be cleanup attempt since the first attempt 
+    // fails with an exception
+    assertTrue(!tip.isCleanupAttempt(attemptId));
     TaskStatus ts = 
       mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId);
-    validateAttempt(tip, attemptId, ts, false);
+    assertTrue(ts != null);
+    assertEquals(TaskStatus.State.FAILED, ts.getRunState());
+    // validate task logs: tasklog should contain both task logs
+    // and cleanup logs
+    String log = TestMiniMRMapRedDebugScript.readTaskLog(
+                      TaskLog.LogName.STDERR, attemptId, false);
+    assertTrue(log.contains(taskLog));
+    assertTrue(log.contains(cleanupLog));
     
     attemptId =  new TaskAttemptID(new TaskID(jobId, true, 0), 1);
     // this should be cleanup attempt since the second attempt fails
     // with System.exit
-
+    assertTrue(tip.isCleanupAttempt(attemptId));
     ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId);
+    assertTrue(ts != null);
+    assertEquals(TaskStatus.State.FAILED, ts.getRunState());
+    // validate tasklogs for task attempt
+    log = TestMiniMRMapRedDebugScript.readTaskLog(
+               TaskLog.LogName.STDERR, attemptId, false);
+    assertTrue(log.contains(taskLog));
 
-    validateAttempt(tip, attemptId, ts, true);
- 
-    attemptId =  new TaskAttemptID(new TaskID(jobId, true, 0), 2);
-    // this should be cleanup attempt since the third attempt fails
-    // with OutOfMemory
-    ts = mr.getJobTrackerRunner().getJobTracker().getTaskStatus(attemptId);
-    validateAttempt(tip, attemptId, ts, true);
+    // validate tasklogs for cleanup attempt
+    log = TestMiniMRMapRedDebugScript.readTaskLog(
+               TaskLog.LogName.STDERR, attemptId, true);
+    assertTrue(log.contains(cleanupLog));
   }
   
   public void testWithDFS() throws IOException {