You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by vi...@apache.org on 2013/06/02 00:02:44 UTC

svn commit: r1488619 - in /hadoop/common/branches/branch-2/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/

Author: vinodkv
Date: Sat Jun  1 22:02:44 2013
New Revision: 1488619

URL: http://svn.apache.org/r1488619
Log:
YARN-733. Fixed TestNMClient from failing occasionally. Contributed by Zhijie Shen.
svn merge --ignore-ancestry -c 1488618 ../../trunk/

Modified:
    hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1488619&r1=1488618&r2=1488619&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Sat Jun  1 22:02:44 2013
@@ -377,6 +377,9 @@ Release 2.1.0-beta - UNRELEASED
     YARN-578. Fixed NM to use SecureIOUtils for reading and aggregating logs.
     (Omkar Vinit Joshi via vinodkv) 
 
+    YARN-733. Fixed TestNMClient from failing occasionally. (Zhijie Shen via
+    vinodkv)
+
   BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
 
     YARN-158. Yarn creating package-info.java must not depend on sh.

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java?rev=1488619&r1=1488618&r2=1488619&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java Sat Jun  1 22:02:44 2013
@@ -64,6 +64,17 @@ import org.apache.hadoop.yarn.util.Recor
  * continue to run even after this client is stopped and till the application
  * runs at which point ResourceManager will forcefully kill them.
  * </p>
+ *
+ * <p>
+ * Note that the blocking APIs ensure the RPC calls to <code>NodeManager</code>
+ * are executed immediately, and the responses are received before these APIs
+ * return. However, when {@link #startContainer} or {@link #stopContainer}
+ * returns, <code>NodeManager</code> may still need some time to either start
+ * or stop the container because of its asynchronous implementation. Therefore,
+ * {@link #getContainerStatus} is likely to return a transit container status
+ * if it is executed immediately after {@link #startContainer} or
+ * {@link #stopContainer}.
+ * </p>
  */
 public class NMClientImpl extends AbstractService implements NMClient {
 

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java?rev=1488619&r1=1488618&r2=1488619&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java Sat Jun  1 22:02:44 2013
@@ -20,8 +20,8 @@ package org.apache.hadoop.yarn.client;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
@@ -228,7 +228,7 @@ public class TestNMClient {
   }
 
   private void testContainerManagement(NMClientImpl nmClient,
-      Set<Container> containers) throws IOException {
+      Set<Container> containers) throws YarnRemoteException, IOException {
     int size = containers.size();
     int i = 0;
     for (Container container : containers) {
@@ -271,17 +271,9 @@ public class TestNMClient {
 
       // leave one container unclosed
       if (++i < size) {
-        try {
-          ContainerStatus status = nmClient.getContainerStatus(container.getId(),
-              container.getNodeId(), container.getContainerToken());
-          // verify the container is started and in good shape
-          assertEquals(container.getId(), status.getContainerId());
-          assertEquals(ContainerState.RUNNING, status.getState());
-          assertEquals("", status.getDiagnostics());
-          assertEquals(-1000, status.getExitStatus());
-        } catch (YarnRemoteException e) {
-          fail("Exception is not expected");
-        }
+        // NodeManager may still need some time to make the container started
+        testGetContainerStatus(container, i, ContainerState.RUNNING, "",
+            -1000);
 
         try {
           nmClient.stopContainer(container.getId(), container.getNodeId(),
@@ -291,18 +283,8 @@ public class TestNMClient {
         }
 
         // getContainerStatus can be called after stopContainer
-        try {
-          ContainerStatus status = nmClient.getContainerStatus(
-              container.getId(), container.getNodeId(),
-              container.getContainerToken());
-          assertEquals(container.getId(), status.getContainerId());
-          assertEquals(ContainerState.RUNNING, status.getState());
-          assertTrue("" + i, status.getDiagnostics().contains(
-              "Container killed by the ApplicationMaster."));
-          assertEquals(-1000, status.getExitStatus());
-        } catch (YarnRemoteException e) {
-          fail("Exception is not expected");
-        }
+        testGetContainerStatus(container, i, ContainerState.COMPLETE,
+            "Container killed by the ApplicationMaster.", 143);
       }
     }
   }
@@ -315,4 +297,28 @@ public class TestNMClient {
     }
   }
 
+  private void testGetContainerStatus(Container container, int index,
+      ContainerState state, String diagnostics, int exitStatus)
+          throws YarnRemoteException, IOException {
+    while (true) {
+      try {
+        ContainerStatus status = nmClient.getContainerStatus(
+            container.getId(), container.getNodeId(),
+                container.getContainerToken());
+        // NodeManager may still need some time to get the stable
+        // container status
+        if (status.getState() == state) {
+          assertEquals(container.getId(), status.getContainerId());
+          assertTrue("" + index + ": " + status.getDiagnostics(),
+              status.getDiagnostics().contains(diagnostics));
+          assertEquals(exitStatus, status.getExitStatus());
+          break;
+        }
+        Thread.sleep(100);
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+  }
+
 }