You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by je...@apache.org on 2013/03/06 00:33:21 UTC
svn commit: r1453092 - in
/hadoop/common/branches/branch-0.23/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/
hadoop-yarn/hadoop-yarn-s...
Author: jeagles
Date: Tue Mar 5 23:33:20 2013
New Revision: 1453092
URL: http://svn.apache.org/r1453092
Log:
YARN-227. Application expiration difficult to debug for end-users (Jason Lowe via jeagles)
Modified:
hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt?rev=1453092&r1=1453091&r2=1453092&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt Tue Mar 5 23:33:20 2013
@@ -22,6 +22,9 @@ Release 0.23.7 - UNRELEASED
YARN-269. Resource Manager not logging the health_check_script result when
taking it out (Jason Lowe via kihwal)
+ YARN-227. Application expiration difficult to debug for end-users
+ (Jason Lowe via jeagles)
+
OPTIMIZATIONS
YARN-357. App submission should not be synchronized (daryn)
Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java?rev=1453092&r1=1453091&r2=1453092&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java Tue Mar 5 23:33:20 2013
@@ -134,6 +134,9 @@ public class RMAppAttemptImpl implements
private Configuration conf;
+ private static final ExpiredTransition EXPIRED_TRANSITION =
+ new ExpiredTransition();
+
private static final StateMachineFactory<RMAppAttemptImpl,
RMAppAttemptState,
RMAppAttemptEventType,
@@ -189,7 +192,7 @@ public class RMAppAttemptImpl implements
.addTransition(
RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED,
RMAppAttemptEventType.EXPIRE,
- new FinalTransition(RMAppAttemptState.FAILED))
+ EXPIRED_TRANSITION)
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED,
RMAppAttemptEventType.KILL,
new FinalTransition(RMAppAttemptState.KILLED))
@@ -213,7 +216,7 @@ public class RMAppAttemptImpl implements
.addTransition(
RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED,
RMAppAttemptEventType.EXPIRE,
- new FinalTransition(RMAppAttemptState.FAILED))
+ EXPIRED_TRANSITION)
.addTransition(
RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED,
RMAppAttemptEventType.KILL,
@@ -391,6 +394,13 @@ public class RMAppAttemptImpl implements
}
}
+ private void setTrackingUrlToRMAppPage() {
+ origTrackingUrl = pjoin(
+ YarnConfiguration.getRMWebAppHostAndPort(conf),
+ "cluster", "app", getAppAttemptId().getApplicationId());
+ proxiedTrackingUrl = origTrackingUrl;
+ }
+
@Override
public String getClientToken() {
return this.clientToken;
@@ -828,6 +838,22 @@ public class RMAppAttemptImpl implements
}
}
+ private static class ExpiredTransition extends FinalTransition {
+
+ public ExpiredTransition() {
+ super(RMAppAttemptState.FAILED);
+ }
+
+ @Override
+ public void transition(RMAppAttemptImpl appAttempt,
+ RMAppAttemptEvent event) {
+ appAttempt.diagnostics.append("ApplicationMaster for attempt " +
+ appAttempt.getAppAttemptId() + " timed out");
+ appAttempt.setTrackingUrlToRMAppPage();
+ super.transition(appAttempt, event);
+ }
+ }
+
private static final class StatusUpdateTransition extends
BaseTransition {
@Override
@@ -907,10 +933,7 @@ public class RMAppAttemptImpl implements
// When the AM dies, the trackingUrl is left pointing to the AM's URL,
// which shows up in the scheduler UI as a broken link. Direct the
// user to the app page on the RM so they can see the status and logs.
- appAttempt.origTrackingUrl = pjoin(
- YarnConfiguration.getRMWebAppHostAndPort(appAttempt.conf),
- "cluster", "app", appAttempt.getAppAttemptId().getApplicationId());
- appAttempt.proxiedTrackingUrl = appAttempt.origTrackingUrl;
+ appAttempt.setTrackingUrlToRMAppPage();
new FinalTransition(RMAppAttemptState.FAILED).transition(
appAttempt, containerFinishedEvent);
Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java?rev=1453092&r1=1453091&r2=1453092&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java Tue Mar 5 23:33:20 2013
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertEqu
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -537,6 +538,39 @@ public class TestRMAppAttemptTransitions
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
}
+ @Test(timeout=10000)
+ public void testLaunchedExpire() {
+ Container amContainer = allocateApplicationAttempt();
+ launchApplicationAttempt(amContainer);
+ applicationAttempt.handle(new RMAppAttemptEvent(
+ applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
+ assertEquals(RMAppAttemptState.FAILED,
+ applicationAttempt.getAppAttemptState());
+ assertTrue("expire diagnostics missing",
+ applicationAttempt.getDiagnostics().contains("timed out"));
+ String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
+ applicationAttempt.getAppAttemptId().getApplicationId());
+ assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
+ assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
+ }
+
+ @Test(timeout=20000)
+ public void testRunningExpire() {
+ Container amContainer = allocateApplicationAttempt();
+ launchApplicationAttempt(amContainer);
+ runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl");
+ applicationAttempt.handle(new RMAppAttemptEvent(
+ applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
+ assertEquals(RMAppAttemptState.FAILED,
+ applicationAttempt.getAppAttemptState());
+ assertTrue("expire diagnostics missing",
+ applicationAttempt.getDiagnostics().contains("timed out"));
+ String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
+ applicationAttempt.getAppAttemptId().getApplicationId());
+ assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
+ assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
+ }
+
@Test
public void testUnregisterToKilledFinish() {
Container amContainer = allocateApplicationAttempt();