You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by mc...@apache.org on 2005/06/26 20:03:17 UTC

svn commit: r201885 - /lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java

Author: mc
Date: Sun Jun 26 11:03:17 2005
New Revision: 201885

URL: http://svn.apache.org/viewcvs?rev=201885&view=rev
Log:

  Abort Job if a single task fails too many times.
This prevents runaway processes.


Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java?rev=201885&r1=201884&r2=201885&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java Sun Jun 26 11:03:17 2005
@@ -33,6 +33,7 @@
  *******************************************************/
 public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmissionProtocol {
     static final int TRACKERINFO_PORT = 7845;
+    static final int MAX_TASK_FAILURES = 3;
 
     public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.mapred.JobTracker");
     public static JobTracker tracker = null;
@@ -475,6 +476,7 @@
         TreeMap completeMapTasks = new TreeMap();
         TreeMap incompleteReduceTasks = new TreeMap();
         TreeMap completeReduceTasks = new TreeMap();
+        TreeMap taskFailures = new TreeMap();
 
         // Info for user; useless for JobTracker
         int numMapTasks = 0;
@@ -665,6 +667,15 @@
                 completeReduceTasks.remove(taskid);
                 incompleteReduceTasks.put(taskid, t);
             }
+            
+            // Check if we need to kill the job because of excess failures
+            Integer failures = (Integer) taskFailures.get(taskid);
+            int numFailures = ((failures == null) ? 0 : failures.intValue()) + 1;
+            taskFailures.put(taskid, new Integer(numFailures));
+            if (numFailures >= MAX_TASK_FAILURES) {
+                kill();
+            }
+
             if (status.getRunState() == JobStatus.RUNNING) {
                 executeTask(taskid);
             }