You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/05/02 22:03:57 UTC
svn commit: r534606 - in /lucene/hadoop/trunk: CHANGES.txt
conf/hadoop-default.xml src/java/org/apache/hadoop/mapred/JobConf.java
src/java/org/apache/hadoop/mapred/TaskInProgress.java
Author: cutting
Date: Wed May 2 13:03:56 2007
New Revision: 534606
URL: http://svn.apache.org/viewvc?view=rev&rev=534606
Log:
HADOOP-1304. Make configurable the maximum number of task attempts before a job fails. Contributed by Devaraj.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/conf/hadoop-default.xml
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=534606&r1=534605&r2=534606
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed May 2 13:03:56 2007
@@ -306,6 +306,9 @@
the new format. Please backup your data first before upgrading
(using 'hadoop distcp' for example). (tomwhite)
+91. HADOOP-1304. Make configurable the maximum number of task
+ attempts before a job fails. (Devaraj Das via cutting)
+
Release 0.12.3 - 2007-04-06
Modified: lucene/hadoop/trunk/conf/hadoop-default.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/conf/hadoop-default.xml?view=diff&rev=534606&r1=534605&r2=534606
==============================================================================
--- lucene/hadoop/trunk/conf/hadoop-default.xml (original)
+++ lucene/hadoop/trunk/conf/hadoop-default.xml Wed May 2 13:03:56 2007
@@ -544,6 +544,24 @@
</property>
<property>
+ <name>mapred.map.max.attempts</name>
+ <value>4</value>
+ <description>Expert: The maximum number of attempts per map task.
+ In other words, framework will try to execute a map task these many number
+ of times before giving up on it.
+ </description>
+</property>
+
+<property>
+ <name>mapred.reduce.max.attempts</name>
+ <value>4</value>
+ <description>Expert: The maximum number of attempts per reduce task.
+ In other words, framework will try to execute a reduce task these many number
+ of times before giving up on it.
+ </description>
+</property>
+
+<property>
<name>mapred.reduce.parallel.copies</name>
<value>5</value>
<description>The default number of parallel transfers run by reduce
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java?view=diff&rev=534606&r1=534605&r2=534606
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java Wed May 2 13:03:56 2007
@@ -510,7 +510,41 @@
public int getNumReduceTasks() { return getInt("mapred.reduce.tasks", 1); }
public void setNumReduceTasks(int n) { setInt("mapred.reduce.tasks", n); }
+
+ /** Get the configured number of maximum attempts that will be made to run a
+ * map task, as specified by the <code>mapred.map.max.attempts</code>
+ * property. If this property is not already set, the default is 4 attempts
+ * @return the max number of attempts
+ */
+ public int getMaxMapAttempts() {
+ return getInt("mapred.map.max.attempts", 4);
+ }
+ /** Expert: Set the number of maximum attempts that will be made to run a
+ * map task
+ * @param n the number of attempts
+ *
+ */
+ public void setMaxMapAttempts(int n) {
+ setInt("mapred.map.max.attempts", n);
+ }
+ /** Get the configured number of maximum attempts that will be made to run a
+ * reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
+ * property. If this property is not already set, the default is 4 attempts
+ * @return the max number of attempts
+ */
+ public int getMaxReduceAttempts() {
+ return getInt("mapred.reduce.max.attempts", 4);
+ }
+ /** Expert: Set the number of maximum attempts that will be made to run a
+ * reduce task
+ * @param n the number of attempts
+ *
+ */
+ public void setMaxReduceAttempts(int n) {
+ setInt("mapred.reduce.max.attempts", n);
+ }
+
/**
* Get the user-specified job name. This is only used to identify the
* job to the user.
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java?view=diff&rev=534606&r1=534605&r2=534606
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java Wed May 2 13:03:56 2007
@@ -49,7 +49,7 @@
////////////////////////////////////////////////////////
class TaskInProgress {
static final int MAX_TASK_EXECS = 1;
- static final int MAX_TASK_FAILURES = 4;
+ int maxTaskAttempts = 4;
static final double SPECULATIVE_GAP = 0.2;
static final long SPECULATIVE_LAG = 60 * 1000;
private static NumberFormat idFormat = NumberFormat.getInstance();
@@ -125,6 +125,7 @@
this.job = job;
this.conf = conf;
this.partition = partition;
+ setMaxTaskAttempts();
init(uniqueString);
}
@@ -141,8 +142,19 @@
this.jobtracker = jobtracker;
this.job = job;
this.conf = conf;
+ setMaxTaskAttempts();
init(uniqueString);
}
+ /**
+ * Set the max number of attempts before we declare a TIP as "failed"
+ */
+ private void setMaxTaskAttempts() {
+ if (isMapTask()) {
+ this.maxTaskAttempts = conf.getMaxMapAttempts();
+ } else {
+ this.maxTaskAttempts = conf.getMaxReduceAttempts();
+ }
+ }
/**
* Make a unique name for this TIP.
@@ -430,7 +442,7 @@
numKilledTasks++;
}
- if (numTaskFailures >= MAX_TASK_FAILURES) {
+ if (numTaskFailures >= maxTaskAttempts) {
LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times.");
kill();
}
@@ -620,11 +632,11 @@
// Create the 'taskid'; do not count the 'killed' tasks against the job!
String taskid = null;
- if (nextTaskId < (MAX_TASK_EXECS + MAX_TASK_FAILURES + numKilledTasks)) {
+ if (nextTaskId < (MAX_TASK_EXECS + maxTaskAttempts + numKilledTasks)) {
taskid = new String("task_" + taskIdPrefix + "_" + nextTaskId);
++nextTaskId;
} else {
- LOG.warn("Exceeded limit of " + (MAX_TASK_EXECS + MAX_TASK_FAILURES) +
+ LOG.warn("Exceeded limit of " + (MAX_TASK_EXECS + maxTaskAttempts) +
" (plus " + numKilledTasks + " killed)" +
" attempts for the tip '" + getTIPId() + "'");
return null;