You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ma...@apache.org on 2011/05/18 03:04:31 UTC
svn commit: r1104647 - in /hadoop/mapreduce/branches/MR-279: ./
yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/
yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org...
Author: mahadev
Date: Wed May 18 01:04:31 2011
New Revision: 1104647
URL: http://svn.apache.org/viewvc?rev=1104647&view=rev
Log:
Fix job hang if the AM launch fails. (mahadev)
Modified:
hadoop/mapreduce/branches/MR-279/CHANGES.txt
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMLauncher.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterInfo.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/SchedulerNegotiator.java
hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/events/ApplicationMasterEvents.java
Modified: hadoop/mapreduce/branches/MR-279/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/CHANGES.txt?rev=1104647&r1=1104646&r2=1104647&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/CHANGES.txt (original)
+++ hadoop/mapreduce/branches/MR-279/CHANGES.txt Wed May 18 01:04:31 2011
@@ -3,6 +3,8 @@ Hadoop MapReduce Change Log
Trunk (unreleased changes)
MAPREDUCE-279
+
+ Fix job hang if the AM launch fails. (mahadev)
MAPREDUCE-2504. race in JobHistoryEventHandler stop (siddharth seth via mahadev)
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMLauncher.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMLauncher.java?rev=1104647&r1=1104646&r2=1104647&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMLauncher.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/AMLauncher.java Wed May 18 01:04:31 2011
@@ -256,14 +256,15 @@ public class AMLauncher implements Runna
public void run() {
switch (event) {
case LAUNCH:
+ ApplicationEventType eventType = ApplicationEventType.LAUNCHED;
try {
LOG.info("Launching master" + master.getMaster());
launch();
- } catch(IOException ie) {
+ } catch(Exception ie) {
LOG.info("Error launching ", ie);
- handler.handle(new ASMEvent<ApplicationEventType>(ApplicationEventType.FAILED, master));
+ eventType = ApplicationEventType.LAUNCH_FAILED;
}
- handler.handle(new ASMEvent<ApplicationEventType>(ApplicationEventType.LAUNCHED, master));
+ handler.handle(new ASMEvent<ApplicationEventType>(eventType, master));
break;
case CLEANUP:
try {
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterInfo.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterInfo.java?rev=1104647&r1=1104646&r2=1104647&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterInfo.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/ApplicationMasterInfo.java Wed May 18 01:04:31 2011
@@ -80,6 +80,7 @@ public class ApplicationMasterInfo imple
private final AllocateTransition allocateTransition = new AllocateTransition();
private final LaunchTransition launchTransition = new LaunchTransition();
private final LaunchedTransition launchedTransition = new LaunchedTransition();
+ private final FailedLaunchTransition failedLaunchTransition = new FailedLaunchTransition();
private final StateMachine<ApplicationState, ApplicationEventType,
ASMEvent<ApplicationEventType>> stateMachine;
@@ -133,6 +134,12 @@ public class ApplicationMasterInfo imple
.addTransition(ApplicationState.LAUNCHING, ApplicationState.LAUNCHED,
ApplicationEventType.LAUNCHED, launchedTransition)
+ .addTransition(ApplicationState.LAUNCHING, ApplicationState.PENDING,
+ ApplicationEventType.LAUNCH_FAILED, failedLaunchTransition)
+
+ .addTransition(ApplicationState.PENDING, ApplicationState.ALLOCATING,
+ ApplicationEventType.RELEASED, new ScheduleTransition())
+
/** we cant say if the application was launched or not on a recovery, so for now
* we assume it was launched and wait for its restart.
*/
@@ -326,6 +333,16 @@ public class ApplicationMasterInfo imple
}
}
+ private static class FailedLaunchTransition implements
+ SingleArcTransition<ApplicationMasterInfo, ASMEvent<ApplicationEventType>> {
+ @Override
+ public void transition(ApplicationMasterInfo masterInfo,
+ ASMEvent<ApplicationEventType> event) {
+ masterInfo.handler.handle(new ASMEvent<SNEventType>(
+ SNEventType.RELEASE, masterInfo));
+ }
+ }
+
private static class LaunchTransition implements
SingleArcTransition<ApplicationMasterInfo, ASMEvent<ApplicationEventType>> {
@Override
@@ -389,6 +406,19 @@ public class ApplicationMasterInfo imple
}
+ /* Transition to schedule again on a container launch failure for AM */
+ private static class ScheduleTransition implements
+ SingleArcTransition<ApplicationMasterInfo, ASMEvent<ApplicationEventType>> {
+ @Override
+ public void transition(ApplicationMasterInfo masterInfo,
+ ASMEvent<ApplicationEventType> event) {
+ masterInfo.masterContainer = null;
+ /* schedule for a slot */
+ masterInfo.handler.handle(new ASMEvent<SNEventType>(SNEventType.SCHEDULE,
+ masterInfo));
+ }
+ }
+
/* Transition to start the process of allocating for the AM container */
private static class AllocateTransition implements
SingleArcTransition<ApplicationMasterInfo, ASMEvent<ApplicationEventType>> {
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/SchedulerNegotiator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/SchedulerNegotiator.java?rev=1104647&r1=1104646&r2=1104647&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/SchedulerNegotiator.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/SchedulerNegotiator.java Wed May 18 01:04:31 2011
@@ -202,6 +202,17 @@ class SchedulerNegotiator extends Abstra
case SCHEDULE:
addPending(appContext);
break;
+ case RELEASE:
+ try {
+ scheduler.allocate(appContext.getApplicationID(),
+ EMPTY_ASK, Collections.singletonList(appContext.getMasterContainer()));
+ } catch(IOException ie) {
+ //TODO remove IOException from the scheduler.
+ LOG.error("Error while releasing container for AM " + appContext.getApplicationID());
+ }
+ handler.handle(new ASMEvent<ApplicationEventType>(ApplicationEventType.RELEASED,
+ appContext));
+ break;
case CLEANUP:
try {
finishApplication(appContext);
Modified: hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/events/ApplicationMasterEvents.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/events/ApplicationMasterEvents.java?rev=1104647&r1=1104646&r2=1104647&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/events/ApplicationMasterEvents.java (original)
+++ hadoop/mapreduce/branches/MR-279/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/events/ApplicationMasterEvents.java Wed May 18 01:04:31 2011
@@ -30,6 +30,7 @@ import org.apache.hadoop.classification.
public class ApplicationMasterEvents {
public enum SNEventType {
SCHEDULE,
+ RELEASE,
CLEANUP
};
@@ -48,10 +49,12 @@ public class ApplicationMasterEvents {
public enum ApplicationEventType {
ALLOCATE,
REGISTERED,
+ RELEASED,
RECOVER,
REMOVE,
STATUSUPDATE,
LAUNCH,
+ LAUNCH_FAILED,
LAUNCHED,
FAILED,
FAILED_MAX_RETRIES,