You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by vi...@apache.org on 2014/02/24 23:42:14 UTC
svn commit: r1571476 - in
/hadoop/common/branches/branch-2.4/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-serve...
Author: vinodkv
Date: Mon Feb 24 22:42:14 2014
New Revision: 1571476
URL: http://svn.apache.org/r1571476
Log:
YARN-1686. Fixed NodeManager to properly handle any errors during re-registration after a RESYNC and thus avoid hanging. Contributed by Rohith Sharma.
svn merge --ignore-ancestry -c 1571474 ../../trunk/
Modified:
hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt?rev=1571476&r1=1571475&r2=1571476&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt Mon Feb 24 22:42:14 2014
@@ -307,6 +307,10 @@ Release 2.4.0 - UNRELEASED
YARN-1742. Fixed javadoc of configuration parameter
DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION. (Akira Ajisaka via vinodkv)
+ YARN-1686. Fixed NodeManager to properly handle any errors during
+ re-registration after a RESYNC and thus avoid hanging. (Rohith Sharma via
+ vinodkv)
+
Release 2.3.1 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1571476&r1=1571475&r2=1571476&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Mon Feb 24 22:42:14 2014
@@ -224,11 +224,16 @@ public class NodeManager extends Composi
new Thread() {
@Override
public void run() {
- LOG.info("Notifying ContainerManager to block new container-requests");
- containerManager.setBlockNewContainerRequests(true);
- LOG.info("Cleaning up running containers on resync");
- containerManager.cleanupContainersOnNMResync();
- ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater();
+ try {
+ LOG.info("Notifying ContainerManager to block new container-requests");
+ containerManager.setBlockNewContainerRequests(true);
+ LOG.info("Cleaning up running containers on resync");
+ containerManager.cleanupContainersOnNMResync();
+ ((NodeStatusUpdaterImpl) nodeStatusUpdater).rebootNodeStatusUpdater();
+ } catch (YarnRuntimeException e) {
+ LOG.fatal("Error while rebooting NodeStatusUpdater.", e);
+ shutDown();
+ }
}
}.start();
}
Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java?rev=1571476&r1=1571475&r2=1571476&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java (original)
+++ hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java Mon Feb 24 22:42:14 2014
@@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.conf.YarnC
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
@@ -65,6 +66,7 @@ public class TestNodeManagerResync {
private FileContext localFS;
private CyclicBarrier syncBarrier;
private AtomicBoolean assertionFailedInThread = new AtomicBoolean(false);
+ private AtomicBoolean isNMShutdownCalled = new AtomicBoolean(false);
@Before
public void setup() throws UnsupportedFileSystemException {
@@ -137,6 +139,30 @@ public class TestNodeManagerResync {
Assert.assertFalse(assertionFailedInThread.get());
nm.stop();
}
+
+ @SuppressWarnings("unchecked")
+ @Test(timeout=10000)
+ public void testNMshutdownWhenResyncThrowException() throws IOException,
+ InterruptedException, YarnException {
+ NodeManager nm = new TestNodeManager3();
+ YarnConfiguration conf = createNMConfig();
+ nm.init(conf);
+ nm.start();
+ Assert.assertEquals(1, ((TestNodeManager3) nm).getNMRegistrationCount());
+ nm.getNMDispatcher().getEventHandler()
+ .handle(new NodeManagerEvent(NodeManagerEventType.RESYNC));
+
+ synchronized (isNMShutdownCalled) {
+ while (isNMShutdownCalled.get() == false) {
+ try {
+ isNMShutdownCalled.wait();
+ } catch (InterruptedException e) {
+ }
+ }
+ }
+
+ Assert.assertTrue("NM shutdown not called.",isNMShutdownCalled.get());
+ }
private YarnConfiguration createNMConfig() {
YarnConfiguration conf = new YarnConfiguration();
@@ -322,4 +348,44 @@ public class TestNodeManagerResync {
}
}
}
+
+ class TestNodeManager3 extends NodeManager {
+
+ private int registrationCount = 0;
+
+ @Override
+ protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+ Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+ return new TestNodeStatusUpdaterImpl3(context, dispatcher, healthChecker,
+ metrics);
+ }
+
+ public int getNMRegistrationCount() {
+ return registrationCount;
+ }
+
+ @Override
+ protected void shutDown() {
+ synchronized (isNMShutdownCalled) {
+ isNMShutdownCalled.set(true);
+ isNMShutdownCalled.notify();
+ }
+ }
+
+ class TestNodeStatusUpdaterImpl3 extends MockNodeStatusUpdater {
+
+ public TestNodeStatusUpdaterImpl3(Context context, Dispatcher dispatcher,
+ NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
+ super(context, dispatcher, healthChecker, metrics);
+ }
+
+ @Override
+ protected void registerWithRM() throws YarnException, IOException {
+ super.registerWithRM();
+ registrationCount++;
+ if (registrationCount > 1) {
+ throw new YarnRuntimeException("Registration with RM failed.");
+ }
+ }
+ }}
}