You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by vi...@apache.org on 2014/02/24 23:41:25 UTC

svn commit: r1571474 - in /hadoop/common/trunk/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/...

Author: vinodkv
Date: Mon Feb 24 22:41:24 2014
New Revision: 1571474

URL: http://svn.apache.org/r1571474
Log:
YARN-1686. Fixed NodeManager to properly handle any errors during re-registration after a RESYNC and thus avoid hanging. Contributed by Rohith Sharma.

Modified:
    hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java

Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1571474&r1=1571473&r2=1571474&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Mon Feb 24 22:41:24 2014
@@ -344,6 +344,10 @@ Release 2.4.0 - UNRELEASED
     YARN-1742. Fixed javadoc of configuration parameter
     DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION. (Akira Ajisaka via vinodkv)
 
+    YARN-1686. Fixed NodeManager to properly handle any errors during
+    re-registration after a RESYNC and thus avoid hanging. (Rohith Sharma via
+    vinodkv)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1571474&r1=1571473&r2=1571474&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Mon Feb 24 22:41:24 2014
@@ -224,11 +224,16 @@ public class NodeManager extends Composi
     new Thread() {
       @Override
       public void run() {
-        LOG.info("Notifying ContainerManager to block new container-requests");
-        containerManager.setBlockNewContainerRequests(true);
-        LOG.info("Cleaning up running containers on resync");
-        containerManager.cleanupContainersOnNMResync();
-        ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater();
+        try {
+          LOG.info("Notifying ContainerManager to block new container-requests");
+          containerManager.setBlockNewContainerRequests(true);
+          LOG.info("Cleaning up running containers on resync");
+          containerManager.cleanupContainersOnNMResync();
+          ((NodeStatusUpdaterImpl) nodeStatusUpdater).rebootNodeStatusUpdater();
+        } catch (YarnRuntimeException e) {
+          LOG.fatal("Error while rebooting NodeStatusUpdater.", e);
+          shutDown();
+        }
       }
     }.start();
   }

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java?rev=1571474&r1=1571473&r2=1571474&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java Mon Feb 24 22:41:24 2014
@@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.conf.YarnC
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
@@ -65,6 +66,7 @@ public class TestNodeManagerResync {
   private FileContext localFS;
   private CyclicBarrier syncBarrier;
   private AtomicBoolean assertionFailedInThread = new AtomicBoolean(false);
+  private AtomicBoolean isNMShutdownCalled = new AtomicBoolean(false);
 
   @Before
   public void setup() throws UnsupportedFileSystemException {
@@ -137,6 +139,30 @@ public class TestNodeManagerResync {
     Assert.assertFalse(assertionFailedInThread.get());
     nm.stop();
   }
+  
+  @SuppressWarnings("unchecked")
+  @Test(timeout=10000)
+  public void testNMshutdownWhenResyncThrowException() throws IOException,
+      InterruptedException, YarnException {
+    NodeManager nm = new TestNodeManager3();
+    YarnConfiguration conf = createNMConfig();
+    nm.init(conf);
+    nm.start();
+    Assert.assertEquals(1, ((TestNodeManager3) nm).getNMRegistrationCount());
+    nm.getNMDispatcher().getEventHandler()
+        .handle(new NodeManagerEvent(NodeManagerEventType.RESYNC));
+    
+    synchronized (isNMShutdownCalled) {
+      while (isNMShutdownCalled.get() == false) {
+        try {
+          isNMShutdownCalled.wait();
+        } catch (InterruptedException e) {
+        }
+      }
+    }
+    
+    Assert.assertTrue("NM shutdown not called.",isNMShutdownCalled.get());
+  }
 
   private YarnConfiguration createNMConfig() {
     YarnConfiguration conf = new YarnConfiguration();
@@ -322,4 +348,44 @@ public class TestNodeManagerResync {
       }
     }
   }
+  
+  class TestNodeManager3 extends NodeManager {
+
+    private int registrationCount = 0;
+
+    @Override
+    protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+        Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+      return new TestNodeStatusUpdaterImpl3(context, dispatcher, healthChecker,
+          metrics);
+    }
+
+    public int getNMRegistrationCount() {
+      return registrationCount;
+    }
+
+    @Override
+    protected void shutDown() {
+      synchronized (isNMShutdownCalled) {
+        isNMShutdownCalled.set(true);
+        isNMShutdownCalled.notify();
+      }
+    }
+
+    class TestNodeStatusUpdaterImpl3 extends MockNodeStatusUpdater {
+
+      public TestNodeStatusUpdaterImpl3(Context context, Dispatcher dispatcher,
+          NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
+        super(context, dispatcher, healthChecker, metrics);
+      }
+
+      @Override
+      protected void registerWithRM() throws YarnException, IOException {
+        super.registerWithRM();
+        registrationCount++;
+        if (registrationCount > 1) {
+          throw new YarnRuntimeException("Registration with RM failed.");
+        }
+      }
+    }}
 }