You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by je...@apache.org on 2013/03/06 00:33:21 UTC

svn commit: r1453092 - in /hadoop/common/branches/branch-0.23/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/ hadoop-yarn/hadoop-yarn-s...

Author: jeagles
Date: Tue Mar  5 23:33:20 2013
New Revision: 1453092

URL: http://svn.apache.org/r1453092
Log:
YARN-227. Application expiration difficult to debug for end-users (Jason Lowe via jeagles)

Modified:
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java

Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt?rev=1453092&r1=1453091&r2=1453092&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt Tue Mar  5 23:33:20 2013
@@ -22,6 +22,9 @@ Release 0.23.7 - UNRELEASED
     YARN-269. Resource Manager not logging the health_check_script result when
     taking it out (Jason Lowe via kihwal)
 
+    YARN-227. Application expiration difficult to debug for end-users
+    (Jason Lowe via jeagles)
+
   OPTIMIZATIONS
 
     YARN-357. App submission should not be synchronized (daryn)

Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java?rev=1453092&r1=1453091&r2=1453092&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java Tue Mar  5 23:33:20 2013
@@ -134,6 +134,9 @@ public class RMAppAttemptImpl implements
 
   private Configuration conf;
 
+  private static final ExpiredTransition EXPIRED_TRANSITION =
+      new ExpiredTransition();
+
   private static final StateMachineFactory<RMAppAttemptImpl,
                                            RMAppAttemptState,
                                            RMAppAttemptEventType,
@@ -189,7 +192,7 @@ public class RMAppAttemptImpl implements
       .addTransition(
           RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED,
           RMAppAttemptEventType.EXPIRE,
-          new FinalTransition(RMAppAttemptState.FAILED))
+          EXPIRED_TRANSITION)
       .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED,
           RMAppAttemptEventType.KILL,
           new FinalTransition(RMAppAttemptState.KILLED))
@@ -213,7 +216,7 @@ public class RMAppAttemptImpl implements
       .addTransition(
           RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED,
           RMAppAttemptEventType.EXPIRE,
-          new FinalTransition(RMAppAttemptState.FAILED))
+          EXPIRED_TRANSITION)
       .addTransition(
           RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED,
           RMAppAttemptEventType.KILL,
@@ -391,6 +394,13 @@ public class RMAppAttemptImpl implements
     }
   }
 
+  private void setTrackingUrlToRMAppPage() {
+    origTrackingUrl = pjoin(
+        YarnConfiguration.getRMWebAppHostAndPort(conf),
+        "cluster", "app", getAppAttemptId().getApplicationId());
+    proxiedTrackingUrl = origTrackingUrl;
+  }
+
   @Override
   public String getClientToken() {
     return this.clientToken;
@@ -828,6 +838,22 @@ public class RMAppAttemptImpl implements
     }
   }
 
+  private static class ExpiredTransition extends FinalTransition {
+
+    public ExpiredTransition() {
+      super(RMAppAttemptState.FAILED);
+    }
+
+    @Override
+    public void transition(RMAppAttemptImpl appAttempt,
+        RMAppAttemptEvent event) {
+      appAttempt.diagnostics.append("ApplicationMaster for attempt " +
+        appAttempt.getAppAttemptId() + " timed out");
+      appAttempt.setTrackingUrlToRMAppPage();
+      super.transition(appAttempt, event);
+    }
+  }
+
   private static final class StatusUpdateTransition extends
       BaseTransition {
     @Override
@@ -907,10 +933,7 @@ public class RMAppAttemptImpl implements
         // When the AM dies, the trackingUrl is left pointing to the AM's URL,
         // which shows up in the scheduler UI as a broken link.  Direct the
         // user to the app page on the RM so they can see the status and logs.
-        appAttempt.origTrackingUrl = pjoin(
-            YarnConfiguration.getRMWebAppHostAndPort(appAttempt.conf),
-            "cluster", "app", appAttempt.getAppAttemptId().getApplicationId());
-        appAttempt.proxiedTrackingUrl = appAttempt.origTrackingUrl;
+        appAttempt.setTrackingUrlToRMAppPage();
 
         new FinalTransition(RMAppAttemptState.FAILED).transition(
             appAttempt, containerFinishedEvent);

Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java?rev=1453092&r1=1453091&r2=1453092&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java Tue Mar  5 23:33:20 2013
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
@@ -537,6 +538,39 @@ public class TestRMAppAttemptTransitions
     assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
   }
 
+  @Test(timeout=10000)
+  public void testLaunchedExpire() {
+    Container amContainer = allocateApplicationAttempt();
+    launchApplicationAttempt(amContainer);
+    applicationAttempt.handle(new RMAppAttemptEvent(
+        applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
+    assertEquals(RMAppAttemptState.FAILED,
+        applicationAttempt.getAppAttemptState());
+    assertTrue("expire diagnostics missing",
+        applicationAttempt.getDiagnostics().contains("timed out"));
+    String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
+        applicationAttempt.getAppAttemptId().getApplicationId());
+    assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
+    assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
+  }
+
+  @Test(timeout=20000)
+  public void testRunningExpire() {
+    Container amContainer = allocateApplicationAttempt();
+    launchApplicationAttempt(amContainer);
+    runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl");
+    applicationAttempt.handle(new RMAppAttemptEvent(
+        applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
+    assertEquals(RMAppAttemptState.FAILED,
+        applicationAttempt.getAppAttemptState());
+    assertTrue("expire diagnostics missing",
+        applicationAttempt.getDiagnostics().contains("timed out"));
+    String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
+        applicationAttempt.getAppAttemptId().getApplicationId());
+    assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
+    assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
+  }
+
   @Test 
   public void testUnregisterToKilledFinish() {
     Container amContainer = allocateApplicationAttempt();